Merge pull request #1759 from ilya-lavrenov:ocl_distanceToCenters
This commit is contained in:
@@ -160,63 +160,66 @@ static void generateCentersPP(const Mat& _data, Mat& _out_centers,
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers, int distType, const oclMat &indices)
|
||||
void cv::ocl::distanceToCenters(const oclMat &src, const oclMat ¢ers, Mat &dists, Mat &labels, int distType)
|
||||
{
|
||||
CV_Assert(src.cols*src.oclchannels() == centers.cols*centers.oclchannels());
|
||||
CV_Assert(src.cols * src.channels() == centers.cols * centers.channels());
|
||||
CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F);
|
||||
bool is_label_row_major = false;
|
||||
ensureSizeIsEnough(1, src.rows, CV_32FC1, dists);
|
||||
if(labels.empty() || (!labels.empty() && labels.rows == src.rows && labels.cols == 1))
|
||||
{
|
||||
ensureSizeIsEnough(src.rows, 1, CV_32SC1, labels);
|
||||
is_label_row_major = true;
|
||||
}
|
||||
CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR);
|
||||
|
||||
dists.create(src.rows, 1, CV_32FC1);
|
||||
labels.create(src.rows, 1, CV_32SC1);
|
||||
|
||||
std::stringstream build_opt_ss;
|
||||
build_opt_ss
|
||||
<< (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST")
|
||||
<< (indices.empty() ? "" : " -D USE_INDEX");
|
||||
build_opt_ss << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST");
|
||||
|
||||
String build_opt = build_opt_ss.str();
|
||||
int src_step = src.step / src.elemSize1();
|
||||
int centers_step = centers.step / centers.elemSize1();
|
||||
int feature_width = centers.cols * centers.oclchannels();
|
||||
int src_offset = src.offset / src.elemSize1();
|
||||
int centers_offset = centers.offset / centers.elemSize1();
|
||||
|
||||
const int src_step = (int)(src.oclchannels() * src.step / src.elemSize());
|
||||
const int centers_step = (int)(centers.oclchannels() * centers.step / centers.elemSize());
|
||||
|
||||
const int colsNumb = centers.cols*centers.oclchannels();
|
||||
|
||||
const int label_step = is_label_row_major ? (int)(labels.step / labels.elemSize()) : 1;
|
||||
String kernelname = "distanceToCenters";
|
||||
|
||||
const int number_of_input = indices.empty() ? src.rows : indices.size().area();
|
||||
|
||||
const int src_offset = (int)src.offset/src.elemSize();
|
||||
const int centers_offset = (int)centers.offset/centers.elemSize();
|
||||
|
||||
size_t globalThreads[3] = {number_of_input, 1, 1};
|
||||
int all_dist_count = src.rows * centers.rows;
|
||||
oclMat all_dist(1, all_dist_count, CV_32FC1);
|
||||
|
||||
vector<pair<size_t, const void *> > args;
|
||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back(make_pair(sizeof(cl_mem), (void *)¢ers.data));
|
||||
if(!indices.empty())
|
||||
{
|
||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&indices.data));
|
||||
}
|
||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data));
|
||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&dists.data));
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)&colsNumb));
|
||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&all_dist.data));
|
||||
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)&feature_width));
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)&src_step));
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)¢ers_step));
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)&label_step));
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)&number_of_input));
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.rows));
|
||||
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset));
|
||||
args.push_back(make_pair(sizeof(cl_int), (void *)¢ers_offset));
|
||||
|
||||
size_t globalThreads[3] = { all_dist_count, 1, 1 };
|
||||
|
||||
openCLExecuteKernel(Context::getContext(), &kmeans_kernel,
|
||||
kernelname, globalThreads, NULL, args, -1, -1, build_opt.c_str());
|
||||
"distanceToCenters", globalThreads, NULL, args, -1, -1, build_opt_ss.str().c_str());
|
||||
|
||||
Mat all_dist_cpu;
|
||||
all_dist.download(all_dist_cpu);
|
||||
|
||||
for (int i = 0; i < src.rows; ++i)
|
||||
{
|
||||
Point p;
|
||||
double minVal;
|
||||
|
||||
Rect roi(i * centers.rows, 0, centers.rows, 1);
|
||||
Mat hdr(all_dist_cpu, roi);
|
||||
|
||||
cv::minMaxLoc(hdr, &minVal, NULL, &p);
|
||||
|
||||
dists.at<float>(i, 0) = static_cast<float>(minVal);
|
||||
labels.at<int>(i, 0) = p.x;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////k - means /////////////////////////////////////////////////////////
|
||||
|
||||
double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
|
||||
TermCriteria criteria, int attempts, int flags, oclMat &_centers)
|
||||
{
|
||||
@@ -429,28 +432,19 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
|
||||
break;
|
||||
|
||||
// assign labels
|
||||
oclMat _dists(1, N, CV_64F);
|
||||
|
||||
_bestLabels.upload(_labels);
|
||||
Mat dists(1, N, CV_64F);
|
||||
_centers.upload(centers);
|
||||
distanceToCenters(_src, _centers, dists, _labels);
|
||||
_bestLabels.upload(_labels);
|
||||
|
||||
distanceToCenters(_dists, _bestLabels, _src, _centers);
|
||||
|
||||
Mat dists;
|
||||
_dists.download(dists);
|
||||
_bestLabels.download(_labels);
|
||||
float* dist = dists.ptr<float>(0);
|
||||
compactness = 0;
|
||||
for( i = 0; i < N; i++ )
|
||||
{
|
||||
compactness += (double)dist[i];
|
||||
}
|
||||
compactness += (double)dist[i];
|
||||
}
|
||||
|
||||
if( compactness < best_compactness )
|
||||
{
|
||||
best_compactness = compactness;
|
||||
}
|
||||
}
|
||||
|
||||
return best_compactness;
|
||||
|
||||
@@ -44,81 +44,64 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef L1_DIST
|
||||
# define DISTANCE(A, B) fabs((A) - (B))
|
||||
#elif defined L2SQR_DIST
|
||||
# define DISTANCE(A, B) ((A) - (B)) * ((A) - (B))
|
||||
#else
|
||||
# define DISTANCE(A, B) ((A) - (B)) * ((A) - (B))
|
||||
#endif
|
||||
|
||||
inline float dist(__global const float * center, __global const float * src, int feature_cols)
|
||||
static float distance_(__global const float * center, __global const float * src, int feature_length)
|
||||
{
|
||||
float res = 0;
|
||||
float4 tmp4;
|
||||
int i;
|
||||
for(i = 0; i < feature_cols / 4; i += 4, center += 4, src += 4)
|
||||
{
|
||||
tmp4 = vload4(0, center) - vload4(0, src);
|
||||
float4 v0, v1, v2;
|
||||
int i = 0;
|
||||
|
||||
#ifdef L1_DIST
|
||||
tmp4 = fabs(tmp4);
|
||||
#else
|
||||
tmp4 *= tmp4;
|
||||
float4 sum = (float4)(0.0f);
|
||||
#endif
|
||||
|
||||
for ( ; i <= feature_length - 4; i += 4)
|
||||
{
|
||||
v0 = vload4(0, center + i);
|
||||
v1 = vload4(0, src + i);
|
||||
v2 = v1 - v0;
|
||||
#ifdef L1_DIST
|
||||
v0 = fabs(v2);
|
||||
sum += v0;
|
||||
#else
|
||||
res += dot(v2, v2);
|
||||
#endif
|
||||
res += tmp4.x + tmp4.y + tmp4.z + tmp4.w;
|
||||
}
|
||||
|
||||
for(; i < feature_cols; ++i, ++center, ++src)
|
||||
#ifdef L1_DIST
|
||||
res = sum.x + sum.y + sum.z + sum.w;
|
||||
#endif
|
||||
|
||||
for ( ; i < feature_length; ++i)
|
||||
{
|
||||
res += DISTANCE(*src, *center);
|
||||
float t0 = src[i];
|
||||
float t1 = center[i];
|
||||
#ifdef L1_DIST
|
||||
res += fabs(t0 - t1);
|
||||
#else
|
||||
float t2 = t0 - t1;
|
||||
res += t2 * t2;
|
||||
#endif
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// to be distinguished with distanceToCenters in kmeans_kernel.cl
|
||||
__kernel void distanceToCenters(
|
||||
__global const float *src,
|
||||
__global const float *centers,
|
||||
#ifdef USE_INDEX
|
||||
__global const int *indices,
|
||||
#endif
|
||||
__global int *labels,
|
||||
__global float *dists,
|
||||
int feature_cols,
|
||||
int src_step,
|
||||
int centers_step,
|
||||
int label_step,
|
||||
int input_size,
|
||||
int K,
|
||||
int offset_src,
|
||||
int offset_centers
|
||||
)
|
||||
__kernel void distanceToCenters(__global const float * src, __global const float * centers,
|
||||
__global float * dists, int feature_length,
|
||||
int src_step, int centers_step,
|
||||
int features_count, int centers_count,
|
||||
int src_offset, int centers_offset)
|
||||
{
|
||||
int gid = get_global_id(0);
|
||||
float euDist, minval;
|
||||
int minCentroid;
|
||||
if(gid >= input_size)
|
||||
|
||||
if (gid < (features_count * centers_count))
|
||||
{
|
||||
return;
|
||||
int feature_index = gid / centers_count;
|
||||
int center_index = gid % centers_count;
|
||||
|
||||
int center_idx = mad24(center_index, centers_step, centers_offset);
|
||||
int src_idx = mad24(feature_index, src_step, src_offset);
|
||||
|
||||
dists[gid] = distance_(centers + center_idx, src + src_idx, feature_length);
|
||||
}
|
||||
src += offset_src;
|
||||
centers += offset_centers;
|
||||
#ifdef USE_INDEX
|
||||
src += indices[gid] * src_step;
|
||||
#else
|
||||
src += gid * src_step;
|
||||
#endif
|
||||
minval = dist(centers, src, feature_cols);
|
||||
minCentroid = 0;
|
||||
for(int i = 1 ; i < K; i++)
|
||||
{
|
||||
euDist = dist(centers + i * centers_step, src, feature_cols);
|
||||
if(euDist < minval)
|
||||
{
|
||||
minval = euDist;
|
||||
minCentroid = i;
|
||||
}
|
||||
}
|
||||
labels[gid * label_step] = minCentroid;
|
||||
dists[gid] = minval;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user