From b4a4a05bdc7bcf220b132b5ab5fc9fb47b4e11ec Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 22 May 2013 13:46:42 +0800 Subject: [PATCH] Add ocl's good features to track implementation. Additional notes with this commit: 1. Add cornerHarris_dxdy and cornerMinEigenVal_dxdy to get the interim dx and dy output of Sobel operator; 2. Add minMax_buf to allow user to reuse buffers in minMax; 3. Fix an error when either min or max pointer fed into minMax is NULL; 4. Corner sorter temporarily uses C++ STL's quick sort. A parallel selection sort in OpneCL is contained in the implementation but disabled due to poor performance at the moment. 5. Accuracy test for ocl gfft. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 54 ++- .../ocl/include/opencv2/ocl/private/util.hpp | 29 +- modules/ocl/src/arithm.cpp | 40 +- modules/ocl/src/gfft.cpp | 351 ++++++++++++++++++ modules/ocl/src/imgproc.cpp | 39 +- modules/ocl/src/mcwutil.cpp | 7 +- modules/ocl/src/opencl/imgproc_gfft.cl | 276 ++++++++++++++ modules/ocl/test/test_optflow.cpp | 77 ++++ 8 files changed, 841 insertions(+), 32 deletions(-) create mode 100644 modules/ocl/src/gfft.cpp create mode 100644 modules/ocl/src/opencl/imgproc_gfft.cl diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 1cace848c..f9fb4b44e 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -122,8 +122,9 @@ namespace cv CV_EXPORTS void setBinpath(const char *path); //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue + //returns cl_context * CV_EXPORTS void* getoclContext(); - + //returns cl_command_queue * CV_EXPORTS void* getoclCommandQueue(); //explicit call clFinish. The global command queue will be used. @@ -461,6 +462,7 @@ namespace cv // support all C1 types CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat()); + CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf); //! finds global minimum and maximum array elements and returns their values with locations // support all C1 types @@ -789,7 +791,11 @@ namespace cv CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum); CV_EXPORTS void integral(const oclMat &src, oclMat &sum); CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, + int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, + int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////CascadeClassifier////////////////////////////////////////////////////////////////// @@ -1253,6 +1259,52 @@ namespace cv public: explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {} }; + + class CV_EXPORTS GoodFeaturesToTrackDetector_OCL + { + public: + explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, + int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04); + + //! return 1 rows matrix with CV_32FC2 type + void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat()); + //! download points of type Point2f to a vector. the vector's content will be erased + void downloadPoints(const oclMat &points, vector &points_v); + + int maxCorners; + double qualityLevel; + double minDistance; + + int blockSize; + bool useHarrisDetector; + double harrisK; + void releaseMemory() + { + Dx_.release(); + Dy_.release(); + eig_.release(); + minMaxbuf_.release(); + tmpCorners_.release(); + } + private: + oclMat Dx_; + oclMat Dy_; + oclMat eig_; + oclMat minMaxbuf_; + oclMat tmpCorners_; + }; + + inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_, + int blockSize_, bool useHarrisDetector_, double harrisK_) + { + maxCorners = maxCorners_; + qualityLevel = qualityLevel_; + minDistance = minDistance_; + blockSize = blockSize_; + useHarrisDetector = useHarrisDetector_; + harrisK = harrisK_; + } + /////////////////////////////// PyrLKOpticalFlow ///////////////////////////////////// class CV_EXPORTS PyrLKOpticalFlow { diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index f3e582f48..23a3ad468 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -120,6 +120,33 @@ namespace cv cl_mem CV_EXPORTS bindTexture(const oclMat &mat); void CV_EXPORTS releaseTexture(cl_mem& texture); + //Represents an image texture object + class CV_EXPORTS TextureCL + { + public: + TextureCL(cl_mem tex, int r, int c, int t) + : tex_(tex), rows(r), cols(c), type(t) {} + ~TextureCL() + { + openCLFree(tex_); + } + operator cl_mem() + { + return tex_; + } + cl_mem const tex_; + const int rows; + const int cols; + const int type; + private: + //disable assignment + void operator=(const TextureCL&); + }; + // bind oclMat to OpenCL image textures and retunrs an TextureCL object + // note: + // for faster clamping, there is no buffer padding for the constructed texture + Ptr CV_EXPORTS bindTexturePtr(const oclMat &mat); + // returns whether the current context supports image2d_t format or not bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); @@ -132,7 +159,7 @@ namespace cv }; template _ty queryDeviceInfo(cl_kernel kernel = NULL); - //info should have been pre-allocated + template<> int CV_EXPORTS queryDeviceInfo(cl_kernel kernel); template<> diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index ed2515dc6..34569dc57 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -782,45 +782,55 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl } } -template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) +template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, + const oclMat &mask, oclMat &buf) { size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); groupnum = groupnum * 2; int vlen = 8; int dbsize = groupnum * 2 * vlen * sizeof(T) ; - Context *clCxt = src.clCxt; - cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize); - *minVal = std::numeric_limits::max() , *maxVal = -std::numeric_limits::max(); + + ensureSizeIsEnough(1, dbsize, CV_8UC1, buf); + + cl_mem buf_data = reinterpret_cast(buf.data); + if (mask.empty()) { - arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax"); + arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax"); } else { - arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask"); + arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask"); } - T *p = new T[groupnum * vlen * 2]; - memset(p, 0, dbsize); - openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize); - if(minVal != NULL){ + + Mat matbuf = Mat(buf); + T *p = matbuf.ptr(); + if(minVal != NULL) + { + *minVal = std::numeric_limits::max(); for(int i = 0; i < vlen * (int)groupnum; i++) { *minVal = *minVal < p[i] ? *minVal : p[i]; } } - if(maxVal != NULL){ + if(maxVal != NULL) + { + *maxVal = -std::numeric_limits::max(); for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++) { *maxVal = *maxVal > p[i] ? *maxVal : p[i]; } } - delete[] p; - openCLFree(dstBuffer); } -typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask); +typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf); void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) +{ + oclMat buf; + minMax_buf(src, minVal, maxVal, mask, buf); +} +void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { CV_Assert(src.oclchannels() == 1); if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) @@ -840,7 +850,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc }; minMaxFunc func; func = functab[src.depth()]; - func(src, minVal, maxVal, mask); + func(src, minVal, maxVal, mask, buf); } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/gfft.cpp b/modules/ocl/src/gfft.cpp new file mode 100644 index 000000000..af7580bd4 --- /dev/null +++ b/modules/ocl/src/gfft.cpp @@ -0,0 +1,351 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; + +static bool use_cpu_sorter = true; + +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *imgproc_gfft; + } +} + +namespace +{ +enum SortMethod +{ + CPU_STL, + BITONIC, + SELECTION +}; + +const int GROUP_SIZE = 256; + +template +struct Sorter +{ + //typedef EigType; +}; + +//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed. +template<> +struct Sorter +{ + typedef oclMat EigType; + static cv::Mutex cs; + static Mat mat_eig; + + //prototype + static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2) + { + float v1 = mat_eig.at(cvRound(pt1.s[1]), cvRound(pt1.s[0])); + float v2 = mat_eig.at(cvRound(pt2.s[1]), cvRound(pt2.s[0])); + return v1 > v2; + } + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + cv::AutoLock lock(cs); + //temporarily use STL's sort function + Mat mat_corners = corners; + mat_eig = eig_tex; + std::sort(mat_corners.begin(), mat_corners.begin() + count, clfloat2Gt); + corners = mat_corners; + } +}; +cv::Mutex Sorter::cs; +cv::Mat Sorter::mat_eig; + +template<> +struct Sorter +{ + typedef TextureCL EigType; + + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + Context * cxt = Context::getContext(); + size_t globalThreads[3] = {count / 2, 1, 1}; + size_t localThreads[3] = {GROUP_SIZE, 1, 1}; + + // 2^numStages should be equal to count or the output is invalid + int numStages = 0; + for(int i = count; i > 1; i >>= 1) + { + ++numStages; + } + const int argc = 5; + std::vector< std::pair > args(argc); + std::string kernelname = "sortCorners_bitonicSort"; + args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex); + args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data); + args[2] = std::make_pair(sizeof(cl_int), (void *)&count); + for(int stage = 0; stage < numStages; ++stage) + { + args[3] = std::make_pair(sizeof(cl_int), (void *)&stage); + for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage) + { + args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage); + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + } + } + } +}; + +template<> +struct Sorter +{ + typedef TextureCL EigType; + + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + Context * cxt = Context::getContext(); + + size_t globalThreads[3] = {count, 1, 1}; + size_t localThreads[3] = {GROUP_SIZE, 1, 1}; + + std::vector< std::pair > args; + //local + std::string kernelname = "sortCorners_selectionSortLocal"; + int lds_size = GROUP_SIZE * sizeof(cl_float2); + args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) ); + args.push_back( std::make_pair( lds_size, (void*)NULL) ); + + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + + //final + kernelname = "sortCorners_selectionSortFinal"; + args.pop_back(); + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + } +}; + +int findCorners_caller( + const TextureCL& eig, + const float threshold, + const oclMat& mask, + oclMat& corners, + const int max_count) +{ + std::vector k; + Context * cxt = Context::getContext(); + + std::vector< std::pair > args; + std::string kernelname = "findCorners"; + + const int mask_strip = mask.step / mask.elemSize1(); + + oclMat g_counter(1, 1, CV_32SC1); + g_counter.setTo(0); + + args.push_back(make_pair( sizeof(cl_mem), (void*)&eig )); + args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data )); + args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data )); + args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip)); + args.push_back(make_pair( sizeof(cl_float), (void*)&threshold )); + args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows )); + args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols )); + args.push_back(make_pair( sizeof(cl_int), (void*)&max_count )); + args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data )); + + size_t globalThreads[3] = {eig.cols, eig.rows, 1}; + size_t localThreads[3] = {16, 16, 1}; + + const char * opt = mask.empty() ? "" : "-D WITH_MASK"; + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt); + return std::min(Mat(g_counter).at(0), max_count); +} +}//unnamed namespace + +void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask) +{ + CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); + CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); + + CV_DbgAssert(support_image2d()); + + ensureSizeIsEnough(image.size(), CV_32F, eig_); + + if (useHarrisDetector) + cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK); + else + cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3); + + double maxVal = 0; + minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_); + + ensureSizeIsEnough(1, std::max(1000, static_cast(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); + + Ptr eig_tex = bindTexturePtr(eig_); + int total = findCorners_caller( + *eig_tex, + static_cast(maxVal * qualityLevel), + mask, + tmpCorners_, + tmpCorners_.cols); + + if (total == 0) + { + corners.release(); + return; + } + if(use_cpu_sorter) + { + Sorter::sortCorners_caller(eig_, tmpCorners_, total); + } + else + { + //if total is power of 2 + if(((total - 1) & (total)) == 0) + { + Sorter::sortCorners_caller(*eig_tex, tmpCorners_, total); + } + else + { + Sorter::sortCorners_caller(*eig_tex, tmpCorners_, total); + } + } + + if (minDistance < 1) + { + corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1)); + } + else + { + vector tmp(total); + downloadPoints(tmpCorners_, tmp); + + vector tmp2; + tmp2.reserve(total); + + const int cell_size = cvRound(minDistance); + const int grid_width = (image.cols + cell_size - 1) / cell_size; + const int grid_height = (image.rows + cell_size - 1) / cell_size; + + std::vector< std::vector > grid(grid_width * grid_height); + + for (int i = 0; i < total; ++i) + { + Point2f p = tmp[i]; + + bool good = true; + + int x_cell = static_cast(p.x / cell_size); + int y_cell = static_cast(p.y / cell_size); + + int x1 = x_cell - 1; + int y1 = y_cell - 1; + int x2 = x_cell + 1; + int y2 = y_cell + 1; + + // boundary check + x1 = std::max(0, x1); + y1 = std::max(0, y1); + x2 = std::min(grid_width - 1, x2); + y2 = std::min(grid_height - 1, y2); + + for (int yy = y1; yy <= y2; yy++) + { + for (int xx = x1; xx <= x2; xx++) + { + vector& m = grid[yy * grid_width + xx]; + + if (!m.empty()) + { + for(size_t j = 0; j < m.size(); j++) + { + float dx = p.x - m[j].x; + float dy = p.y - m[j].y; + + if (dx * dx + dy * dy < minDistance * minDistance) + { + good = false; + goto break_out; + } + } + } + } + } + + break_out: + + if(good) + { + grid[y_cell * grid_width + x_cell].push_back(p); + + tmp2.push_back(p); + + if (maxCorners > 0 && tmp2.size() == static_cast(maxCorners)) + break; + } + } + + corners.upload(Mat(1, static_cast(tmp2.size()), CV_32FC2, &tmp2[0])); + } +} +void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector &points_v) +{ + CV_DbgAssert(points.type() == CV_32FC2); + points_v.resize(points.cols); + openCLSafeCall(clEnqueueReadBuffer( + *reinterpret_cast(getoclCommandQueue()), + reinterpret_cast(points.data), + CL_TRUE, + 0, + points.cols * sizeof(Point2f), + &points_v[0], + 0, + NULL, + NULL)); +} + + diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index ee1e92a71..83643d290 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -1207,30 +1207,41 @@ namespace cv void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int borderType) { - if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) - { - CV_Error(CV_GpuNotSupported, "select device don't support double"); - } - CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); - oclMat Dx, Dy; - CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); - extractCovData(src, Dx, Dy, blockSize, ksize, borderType); - dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), Dx, Dy, dst, borderType); + oclMat dx, dy; + cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType); } - void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) + void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, + double k, int borderType) { if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); - oclMat Dx, Dy; CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); - extractCovData(src, Dx, Dy, blockSize, ksize, borderType); + extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType); + corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), dx, dy, dst, borderType); + } + + void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) + { + oclMat dx, dy; + cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType); + } + + void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType) + { + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + { + CV_Error(CV_GpuNotSupported, "select device don't support double"); + } + CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); + CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); + extractCovData(src, dx, dy, blockSize, ksize, borderType); + dst.create(src.size(), CV_32F); + corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); } /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index 3bcb8700b..b1f8eebf6 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -156,7 +156,7 @@ namespace cv format.image_channel_order = CL_RGBA; break; default: - CV_Error(-1, "Image forma is not supported"); + CV_Error(-1, "Image format is not supported"); break; } #ifdef CL_VERSION_1_2 @@ -225,6 +225,11 @@ namespace cv openCLSafeCall(err); return texture; } + Ptr bindTexturePtr(const oclMat &mat) + { + return Ptr(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type())); + } + void releaseTexture(cl_mem& texture) { openCLFree(texture); diff --git a/modules/ocl/src/opencl/imgproc_gfft.cl b/modules/ocl/src/opencl/imgproc_gfft.cl new file mode 100644 index 000000000..5fa27ffc1 --- /dev/null +++ b/modules/ocl/src/opencl/imgproc_gfft.cl @@ -0,0 +1,276 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef WITH_MASK +#define WITH_MASK 0 +#endif + +__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; + +inline float ELEM_INT2(image2d_t _eig, int _x, int _y) +{ + return read_imagef(_eig, sampler, (int2)(_x, _y)).x; +} + +inline float ELEM_FLT2(image2d_t _eig, float2 pt) +{ + return read_imagef(_eig, sampler, pt).x; +} + +__kernel + void findCorners + ( + image2d_t eig, + __global const char * mask, + __global float2 * corners, + const int mask_strip,// in pixels + const float threshold, + const int rows, + const int cols, + const int max_count, + __global int * g_counter + ) +{ + const int j = get_global_id(0); + const int i = get_global_id(1); + + if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 +#if WITH_MASK + && mask[i * mask_strip + j] != 0 +#endif + ) + { + const float val = ELEM_INT2(eig, j, i); + + if (val > threshold) + { + float maxVal = val; + + maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal); + + maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal); + + maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal); + + if (val == maxVal) + { + const int ind = atomic_inc(g_counter); + + if (ind < max_count) + corners[ind] = (float2)(j, i); + } + } + } +} + +//bitonic sort +__kernel + void sortCorners_bitonicSort + ( + image2d_t eig, + __global float2 * corners, + const int count, + const int stage, + const int passOfStage + ) +{ + const int threadId = get_global_id(0); + if(threadId >= count / 2) + { + return; + } + + const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent + + const int pairDistance = 1 << (stage - passOfStage); + const int blockWidth = 2 * pairDistance; + + const int leftId = min( (threadId % pairDistance) + + (threadId / pairDistance) * blockWidth, count ); + + const int rightId = min( leftId + pairDistance, count ); + + const float2 leftPt = corners[leftId]; + const float2 rightPt = corners[rightId]; + + const float leftVal = ELEM_FLT2(eig, leftPt); + const float rightVal = ELEM_FLT2(eig, rightPt); + + const bool compareResult = leftVal > rightVal; + + float2 greater = compareResult ? leftPt:rightPt; + float2 lesser = compareResult ? rightPt:leftPt; + + corners[leftId] = sortOrder ? lesser : greater; + corners[rightId] = sortOrder ? greater : lesser; +} + +//selection sort for gfft +//kernel is ported from Bolt library: +//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl +// Local sort will firstly sort elements of each workgroup using selection sort +// its performance is O(n) +__kernel + void sortCorners_selectionSortLocal + ( + image2d_t eig, + __global float2 * corners, + const int count, + __local float2 * scratch + ) +{ + int i = get_local_id(0); // index in workgroup + int numOfGroups = get_num_groups(0); // index in workgroup + int groupID = get_group_id(0); + int wg = get_local_size(0); // workgroup size = block size + int n; // number of elements to be processed for this work group + + int offset = groupID * wg; + int same = 0; + corners += offset; + n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg; + float2 pt1, pt2; + + pt1 = corners[min(i, n)]; + scratch[i] = pt1; + barrier(CLK_LOCAL_MEM_FENCE); + + if(i >= n) + { + return; + } + + float val1 = ELEM_FLT2(eig, pt1); + float val2; + + int pos = 0; + for (int j=0;j val1) + pos++;//calculate the rank of this element in this work group + else + { + if(val1 > val2) + continue; + else + { + // val1 and val2 are same + same++; + } + } + } + for (int j=0; j< same; j++) + corners[pos + j] = pt1; +} +__kernel + void sortCorners_selectionSortFinal + ( + image2d_t eig, + __global float2 * corners, + const int count + ) +{ + const int i = get_local_id(0); // index in workgroup + const int numOfGroups = get_num_groups(0); // index in workgroup + const int groupID = get_group_id(0); + const int wg = get_local_size(0); // workgroup size = block size + int pos = 0, same = 0; + const int offset = get_group_id(0) * wg; + const int remainder = count - wg*(numOfGroups-1); + + if((offset + i ) >= count) + return; + float2 pt1, pt2; + pt1 = corners[groupID*wg + i]; + + float val1 = ELEM_FLT2(eig, pt1); + float val2; + + for(int j=0; j val2) + break; + else + { + //Increment only if the value is not the same. + if( val2 > val1 ) + pos++; + else + same++; + } + } + } + + for(int k=0; k val2) + break; + else + { + //Don't increment if the value is the same. + //Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false + if(val2 > val1) + pos++; + else + same++; + } + } + for (int j=0; j< same; j++) + corners[pos + j] = pt1; +} + diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp index b08d33a08..0121be8f9 100644 --- a/modules/ocl/test/test_optflow.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -55,6 +55,83 @@ using namespace testing; using namespace std; extern string workdir; + + +////////////////////////////////////////////////////// +// GoodFeaturesToTrack +namespace +{ + IMPLEMENT_PARAM_CLASS(MinDistance, double) +} +PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance) +{ + double minDistance; + + virtual void SetUp() + { + minDistance = GET_PARAM(0); + } +}; + +TEST_P(GoodFeaturesToTrack, Accuracy) +{ + cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()); + + int maxCorners = 1000; + double qualityLevel = 0.01; + + cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); + + cv::ocl::oclMat d_pts; + detector(oclMat(frame), d_pts); + + ASSERT_FALSE(d_pts.empty()); + + std::vector pts(d_pts.cols); + + detector.downloadPoints(d_pts, pts); + + std::vector pts_gold; + cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance); + + ASSERT_EQ(pts_gold.size(), pts.size()); + + size_t mistmatch = 0; + for (size_t i = 0; i < pts.size(); ++i) + { + cv::Point2i a = pts_gold[i]; + cv::Point2i b = pts[i]; + + bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1; + + if (!eq) + ++mistmatch; + } + + double bad_ratio = static_cast(mistmatch) / pts.size(); + + ASSERT_LE(bad_ratio, 0.01); +} + +TEST_P(GoodFeaturesToTrack, EmptyCorners) +{ + int maxCorners = 1000; + double qualityLevel = 0.01; + + cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); + + cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0)); + cv::ocl::oclMat corners(1, maxCorners, CV_32FC2); + + detector(src, corners); + + ASSERT_TRUE(corners.empty()); +} + +INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack, + testing::Values(MinDistance(0.0), MinDistance(3.0))); + ////////////////////////////////////////////////////////////////////////// PARAM_TEST_CASE(TVL1, bool) {