diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index fa11177c4..7dcf96fd5 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -834,6 +834,18 @@ namespace cv CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); + /////////////////////////////////// ML /////////////////////////////////////////// + + //! Compute closest centers for each lines in source and lable it after center's index + // supports CV_32FC1/CV_32FC2/CV_32FC4 data type + void DistanceComputer(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers); + + //!Does k-means procedure on GPU + // supports CV_32FC1/CV_32FC2/CV_32FC4 data type + CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels, + TermCriteria criteria, int attemps, int flags, oclMat ¢ers); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////CascadeClassifier////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp new file mode 100644 index 000000000..3922fd8dc --- /dev/null +++ b/modules/ocl/src/kmeans.cpp @@ -0,0 +1,471 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Xiaopeng Fu, fuxiaopeng2222@163.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include +#include "precomp.hpp" + +using namespace cv; +using namespace ocl; + +namespace cv +{ + namespace ocl + { + ////////////////////////////////////OpenCL kernel strings////////////////////////// + extern const char *kmeans_kernel; + } +} +////////////////////////////////////////////////////////////////////////// +//////////////////common///////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +void swap( Mat& a, Mat& b ) +{ + std::swap(a.flags, b.flags); + std::swap(a.dims, b.dims); + std::swap(a.rows, b.rows); + std::swap(a.cols, b.cols); + std::swap(a.data, b.data); + std::swap(a.refcount, b.refcount); + std::swap(a.datastart, b.datastart); + std::swap(a.dataend, b.dataend); + std::swap(a.datalimit, b.datalimit); + std::swap(a.allocator, b.allocator); + + std::swap(a.size.p, b.size.p); + std::swap(a.step.p, b.step.p); + std::swap(a.step.buf[0], b.step.buf[0]); + std::swap(a.step.buf[1], b.step.buf[1]); + + if( a.step.p == b.step.buf ) + { + a.step.p = a.step.buf; + a.size.p = &a.rows; + } + + if( b.step.p == a.step.buf ) + { + b.step.p = b.step.buf; + b.size.p = &b.rows; + } +} + +static void generateRandomCenter(const vector& box, float* center, RNG& rng) +{ + size_t j, dims = box.size(); + float margin = 1.f/dims; + for( j = 0; j < dims; j++ ) + center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0]; +} + +// This class is copied from matrix.cpp in core module. +class KMeansPPDistanceComputer : public ParallelLoopBody +{ +public: + KMeansPPDistanceComputer( float *_tdist2, + const float *_data, + const float *_dist, + int _dims, + size_t _step, + size_t _stepci ) + : tdist2(_tdist2), + data(_data), + dist(_dist), + dims(_dims), + step(_step), + stepci(_stepci) { } + + void operator()( const cv::Range& range ) const + { + const int begin = range.start; + const int end = range.end; + + for ( int i = begin; i _centers(K); + int* centers = &_centers[0]; + vector _dist(N*3); + float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N; + double sum0 = 0; + + centers[0] = (unsigned)rng % N; + + for( i = 0; i < N; i++ ) + { + dist[i] = normL2Sqr_(data + step*i, data + step*centers[0], dims); + sum0 += dist[i]; + } + + for( k = 1; k < K; k++ ) + { + double bestSum = DBL_MAX; + int bestCenter = -1; + + for( j = 0; j < trials; j++ ) + { + double p = (double)rng*sum0, s = 0; + for( i = 0; i < N-1; i++ ) + if( (p -= dist[i]) <= 0 ) + break; + int ci = i; + + parallel_for_(Range(0, N), + KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci)); + for( i = 0; i < N; i++ ) + { + s += tdist2[i]; + } + + if( s < bestSum ) + { + bestSum = s; + bestCenter = ci; + std::swap(tdist, tdist2); + } + } + centers[k] = bestCenter; + sum0 = bestSum; + std::swap(dist, tdist); + } + + for( k = 0; k < K; k++ ) + { + const float* src = data + step*centers[k]; + float* dst = _out_centers.ptr(k); + for( j = 0; j < dims; j++ ) + dst[j] = src[j]; + } +} + +void cv::ocl::DistanceComputer(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers) +{ + //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + //{ + // CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); + // return; + //} + + Context *clCxt = src.clCxt; + int labels_step = (int)(labels.step/labels.elemSize()); + string kernelname = "kmeansComputeDistance"; + int threadNum = src.rows > 256 ? 256 : src.rows; + size_t localThreads[3] = {1, threadNum, 1}; + size_t globalThreads[3] = {1, src.rows, 1}; + + vector > args; + args.push_back(make_pair(sizeof(cl_int), (void *)&labels_step)); + args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.rows)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.cols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back(make_pair(sizeof(cl_mem), (void *)¢ers.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dists.data)); + + openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL); +} +///////////////////////////////////k - means ///////////////////////////////////////////////////////// +double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, + TermCriteria criteria, int attempts, int flags, oclMat &_centers) +{ + const int SPP_TRIALS = 3; + bool isrow = _src.rows == 1 && _src.oclchannels() > 1; + int N = !isrow ? _src.rows : _src.cols; + int dims = (!isrow ? _src.cols : 1) * _src.oclchannels(); + int type = _src.depth(); + + attempts = std::max(attempts, 1); + CV_Assert(type == CV_32F && K > 0 ); + CV_Assert( N >= K ); + + Mat _labels; + if( flags & CV_KMEANS_USE_INITIAL_LABELS ) + { + CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) && + _bestLabels.cols * _bestLabels.rows == N && + _bestLabels.type() == CV_32S ); + _bestLabels.download(_labels); + } + else + { + if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) && + _bestLabels.cols * _bestLabels.rows == N && + _bestLabels.type() == CV_32S && + _bestLabels.isContinuous())) + _bestLabels.create(N, 1, CV_32S); + _labels.create(_bestLabels.size(), _bestLabels.type()); + } + int* labels = _labels.ptr(); + + Mat data; + _src.download(data); + Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); + vector counters(K); + vector _box(dims); + Vec2f* box = &_box[0]; + double best_compactness = DBL_MAX, compactness = 0; + RNG& rng = theRNG(); + int a, iter, i, j, k; + + if( criteria.type & TermCriteria::EPS ) + criteria.epsilon = std::max(criteria.epsilon, 0.); + else + criteria.epsilon = FLT_EPSILON; + criteria.epsilon *= criteria.epsilon; + + if( criteria.type & TermCriteria::COUNT ) + criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100); + else + criteria.maxCount = 100; + + if( K == 1 ) + { + attempts = 1; + criteria.maxCount = 2; + } + + const float* sample = data.ptr(); + for( j = 0; j < dims; j++ ) + box[j] = Vec2f(sample[j], sample[j]); + + for( i = 1; i < N; i++ ) + { + sample = data.ptr(i); + for( j = 0; j < dims; j++ ) + { + float v = sample[j]; + box[j][0] = std::min(box[j][0], v); + box[j][1] = std::max(box[j][1], v); + } + } + + for( a = 0; a < attempts; a++ ) + { + double max_center_shift = DBL_MAX; + for( iter = 0;; ) + { + swap(centers, old_centers); + + if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) ) + { + if( flags & KMEANS_PP_CENTERS ) + generateCentersPP(data, centers, K, rng, SPP_TRIALS); + else + { + for( k = 0; k < K; k++ ) + generateRandomCenter(_box, centers.ptr(k), rng); + } + } + else + { + if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) ) + { + for( i = 0; i < N; i++ ) + CV_Assert( (unsigned)labels[i] < (unsigned)K ); + } + + // compute centers + centers = Scalar(0); + for( k = 0; k < K; k++ ) + counters[k] = 0; + + for( i = 0; i < N; i++ ) + { + sample = data.ptr(i); + k = labels[i]; + float* center = centers.ptr(k); + j=0; + #if CV_ENABLE_UNROLLED + for(; j <= dims - 4; j += 4 ) + { + float t0 = center[j] + sample[j]; + float t1 = center[j+1] + sample[j+1]; + + center[j] = t0; + center[j+1] = t1; + + t0 = center[j+2] + sample[j+2]; + t1 = center[j+3] + sample[j+3]; + + center[j+2] = t0; + center[j+3] = t1; + } + #endif + for( ; j < dims; j++ ) + center[j] += sample[j]; + counters[k]++; + } + + if( iter > 0 ) + max_center_shift = 0; + + for( k = 0; k < K; k++ ) + { + if( counters[k] != 0 ) + continue; + + // if some cluster appeared to be empty then: + // 1. find the biggest cluster + // 2. find the farthest from the center point in the biggest cluster + // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster. + int max_k = 0; + for( int k1 = 1; k1 < K; k1++ ) + { + if( counters[max_k] < counters[k1] ) + max_k = k1; + } + + double max_dist = 0; + int farthest_i = -1; + float* new_center = centers.ptr(k); + float* old_center = centers.ptr(max_k); + float* _old_center = temp.ptr(); // normalized + float scale = 1.f/counters[max_k]; + for( j = 0; j < dims; j++ ) + _old_center[j] = old_center[j]*scale; + + for( i = 0; i < N; i++ ) + { + if( labels[i] != max_k ) + continue; + sample = data.ptr(i); + double dist = normL2Sqr_(sample, _old_center, dims); + + if( max_dist <= dist ) + { + max_dist = dist; + farthest_i = i; + } + } + + counters[max_k]--; + counters[k]++; + labels[farthest_i] = k; + sample = data.ptr(farthest_i); + + for( j = 0; j < dims; j++ ) + { + old_center[j] -= sample[j]; + new_center[j] += sample[j]; + } + } + + for( k = 0; k < K; k++ ) + { + float* center = centers.ptr(k); + CV_Assert( counters[k] != 0 ); + + float scale = 1.f/counters[k]; + for( j = 0; j < dims; j++ ) + center[j] *= scale; + + if( iter > 0 ) + { + double dist = 0; + const float* old_center = old_centers.ptr(k); + for( j = 0; j < dims; j++ ) + { + double t = center[j] - old_center[j]; + dist += t*t; + } + max_center_shift = std::max(max_center_shift, dist); + } + } + } + + if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon ) + break; + + // assign labels + oclMat _dists(1, N, CV_64F); + + _bestLabels.upload(_labels); + _centers.upload(centers); + DistanceComputer(_dists, _bestLabels, _src, _centers); + + Mat dists; + _dists.download(dists); + _bestLabels.download(_labels); + + double* dist = dists.ptr(0); + compactness = 0; + for( i = 0; i < N; i++ ) + { + compactness += dist[i]; + } + } + + if( compactness < best_compactness ) + { + best_compactness = compactness; + } + } + + return best_compactness; +} + diff --git a/modules/ocl/src/opencl/kmeans_kernel.cl b/modules/ocl/src/opencl/kmeans_kernel.cl new file mode 100644 index 000000000..d6f6c721a --- /dev/null +++ b/modules/ocl/src/opencl/kmeans_kernel.cl @@ -0,0 +1,83 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Xiaopeng Fu, fuxiaopeng2222@163.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void kmeansComputeDistance( + int label_step, int K, + __global float *src, + __global int *labels, int dims, int rows, + __global float *centers, + __global float *dists) +{ + int gid = get_global_id(1); + + float dist, euDist, min; + int minCentroid; + + if(gid >= rows) + return; + + for(int i = 0 ;i < K; i++) + { + euDist = 0; + for(int j = 0; j < dims; j++) + { + dist = (src[j + gid * dims] + - centers[j + i * dims]); + euDist += dist * dist; + } + + if(i == 0) + { + min = euDist; + minCentroid = 0; + } else if(euDist < min) + { + min = euDist; + minCentroid = i; + } + } + dists[gid] = min; + labels[label_step * gid] = minCentroid; +} diff --git a/modules/ocl/test/test_kmeans.cpp b/modules/ocl/test/test_kmeans.cpp new file mode 100644 index 000000000..ebade3bbc --- /dev/null +++ b/modules/ocl/test/test_kmeans.cpp @@ -0,0 +1,162 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Erping Pang, pang_er_ping@163.com +// Xiaopeng Fu, fuxiaopeng2222@163.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cvtest; +using namespace testing; +using namespace std; +using namespace cv; + +#define OCL_KMEANS_USE_INITIAL_LABELS 1 +#define OCL_KMEANS_PP_CENTERS 2 + +PARAM_TEST_CASE(Kmeans, int, int, int) +{ + int type; + int K; + int flags; + cv::Mat src ; + ocl::oclMat d_src, d_dists; + + Mat labels, centers; + ocl::oclMat d_labels, d_centers; + cv::RNG rng ; + virtual void SetUp(){ + K = GET_PARAM(0); + type = GET_PARAM(1); + flags = GET_PARAM(2); + rng = TS::ptr()->get_rng(); + + // MWIDTH=256, MHEIGHT=256. defined in utility.hpp + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + src.create(size, type); + int row_idx = 0; + const int max_neighbour = MHEIGHT / K - 1; + CV_Assert(K <= MWIDTH); + for(int i = 0; i < K; i++ ) + { + Mat center_row_header = src.row(row_idx); + center_row_header.setTo(0); + int nchannel = center_row_header.channels(); + for(int j = 0; j < nchannel; j++) + center_row_header.at(0, i*nchannel+j) = 50000.0; + + for(int j = 0; (j < max_neighbour) || + (i == K-1 && j < max_neighbour + MHEIGHT%K); j ++) + { + Mat cur_row_header = src.row(row_idx + 1 + j); + center_row_header.copyTo(cur_row_header); + Mat tmpmat = randomMat(rng, cur_row_header.size(), cur_row_header.type(), -200, 200, false); + cur_row_header += tmpmat; + } + row_idx += 1 + max_neighbour; + } + } +}; +TEST_P(Kmeans, Mat){ + + if(flags & KMEANS_USE_INITIAL_LABELS) + { + // inital a given labels + labels.create(src.rows, 1, CV_32S); + int *label = labels.ptr(); + for(int i = 0; i < src.rows; i++) + label[i] = rng.uniform(0, K); + d_labels.upload(labels); + } + d_src.upload(src); + + for(int j = 0; j < LOOP_TIMES; j++) + { + kmeans(src, K, labels, + TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, 0), + 1, flags, centers); + + ocl::kmeans(d_src, K, d_labels, + TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, 0), + 1, flags, d_centers); + + Mat dd_labels(d_labels); + Mat dd_centers(d_centers); + if(flags & KMEANS_USE_INITIAL_LABELS) + { + EXPECT_MAT_NEAR(labels, dd_labels, 0); + EXPECT_MAT_NEAR(centers, dd_centers, 1e-3); + } + else + { + int row_idx = 0; + for(int i = 0; i < K; i++) + { + // verify lables with ground truth resutls + int label = labels.at(row_idx); + int header_label = dd_labels.at(row_idx); + for(int j = 0; (j < MHEIGHT/K)||(i == K-1 && j < MHEIGHT/K+MHEIGHT%K); j++) + { + ASSERT_NEAR(labels.at(row_idx+j), label, 0); + ASSERT_NEAR(dd_labels.at(row_idx+j), header_label, 0); + } + + // verify centers + float *center = centers.ptr(label); + float *header_center = dd_centers.ptr(header_label); + for(int t = 0; t < centers.cols; t++) + ASSERT_NEAR(center[t], header_center[t], 1e-3); + + row_idx += MHEIGHT/K; + } + } + } +} +INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine( + Values(3, 5, 8), + Values(CV_32FC1, CV_32FC2, CV_32FC4), + Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/))); + +#endif