Add ocl's good features to track implementation.
Additional notes with this commit: 1. Add cornerHarris_dxdy and cornerMinEigenVal_dxdy to get the interim dx and dy output of Sobel operator; 2. Add minMax_buf to allow user to reuse buffers in minMax; 3. Fix an error when either min or max pointer fed into minMax is NULL; 4. Corner sorter temporarily uses C++ STL's quick sort. A parallel selection sort in OpneCL is contained in the implementation but disabled due to poor performance at the moment. 5. Accuracy test for ocl gfft.
This commit is contained in:
parent
d4255b7f75
commit
b4a4a05bdc
@ -122,8 +122,9 @@ namespace cv
|
|||||||
CV_EXPORTS void setBinpath(const char *path);
|
CV_EXPORTS void setBinpath(const char *path);
|
||||||
|
|
||||||
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
|
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
|
||||||
|
//returns cl_context *
|
||||||
CV_EXPORTS void* getoclContext();
|
CV_EXPORTS void* getoclContext();
|
||||||
|
//returns cl_command_queue *
|
||||||
CV_EXPORTS void* getoclCommandQueue();
|
CV_EXPORTS void* getoclCommandQueue();
|
||||||
|
|
||||||
//explicit call clFinish. The global command queue will be used.
|
//explicit call clFinish. The global command queue will be used.
|
||||||
@ -461,6 +462,7 @@ namespace cv
|
|||||||
// support all C1 types
|
// support all C1 types
|
||||||
|
|
||||||
CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
|
CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
|
||||||
|
CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
|
||||||
|
|
||||||
//! finds global minimum and maximum array elements and returns their values with locations
|
//! finds global minimum and maximum array elements and returns their values with locations
|
||||||
// support all C1 types
|
// support all C1 types
|
||||||
@ -789,7 +791,11 @@ namespace cv
|
|||||||
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
|
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
|
||||||
CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
|
CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
|
||||||
CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
|
CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
|
||||||
|
CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
|
||||||
|
int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
|
||||||
CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
|
CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
|
||||||
|
CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
|
||||||
|
int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
|
||||||
@ -1253,6 +1259,52 @@ namespace cv
|
|||||||
public:
|
public:
|
||||||
explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
|
explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
|
||||||
|
int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
|
||||||
|
|
||||||
|
//! return 1 rows matrix with CV_32FC2 type
|
||||||
|
void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
|
||||||
|
//! download points of type Point2f to a vector. the vector's content will be erased
|
||||||
|
void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
|
||||||
|
|
||||||
|
int maxCorners;
|
||||||
|
double qualityLevel;
|
||||||
|
double minDistance;
|
||||||
|
|
||||||
|
int blockSize;
|
||||||
|
bool useHarrisDetector;
|
||||||
|
double harrisK;
|
||||||
|
void releaseMemory()
|
||||||
|
{
|
||||||
|
Dx_.release();
|
||||||
|
Dy_.release();
|
||||||
|
eig_.release();
|
||||||
|
minMaxbuf_.release();
|
||||||
|
tmpCorners_.release();
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
oclMat Dx_;
|
||||||
|
oclMat Dy_;
|
||||||
|
oclMat eig_;
|
||||||
|
oclMat minMaxbuf_;
|
||||||
|
oclMat tmpCorners_;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
|
||||||
|
int blockSize_, bool useHarrisDetector_, double harrisK_)
|
||||||
|
{
|
||||||
|
maxCorners = maxCorners_;
|
||||||
|
qualityLevel = qualityLevel_;
|
||||||
|
minDistance = minDistance_;
|
||||||
|
blockSize = blockSize_;
|
||||||
|
useHarrisDetector = useHarrisDetector_;
|
||||||
|
harrisK = harrisK_;
|
||||||
|
}
|
||||||
|
|
||||||
/////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
|
/////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
|
||||||
class CV_EXPORTS PyrLKOpticalFlow
|
class CV_EXPORTS PyrLKOpticalFlow
|
||||||
{
|
{
|
||||||
|
@ -120,6 +120,33 @@ namespace cv
|
|||||||
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
|
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
|
||||||
void CV_EXPORTS releaseTexture(cl_mem& texture);
|
void CV_EXPORTS releaseTexture(cl_mem& texture);
|
||||||
|
|
||||||
|
//Represents an image texture object
|
||||||
|
class CV_EXPORTS TextureCL
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TextureCL(cl_mem tex, int r, int c, int t)
|
||||||
|
: tex_(tex), rows(r), cols(c), type(t) {}
|
||||||
|
~TextureCL()
|
||||||
|
{
|
||||||
|
openCLFree(tex_);
|
||||||
|
}
|
||||||
|
operator cl_mem()
|
||||||
|
{
|
||||||
|
return tex_;
|
||||||
|
}
|
||||||
|
cl_mem const tex_;
|
||||||
|
const int rows;
|
||||||
|
const int cols;
|
||||||
|
const int type;
|
||||||
|
private:
|
||||||
|
//disable assignment
|
||||||
|
void operator=(const TextureCL&);
|
||||||
|
};
|
||||||
|
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
|
||||||
|
// note:
|
||||||
|
// for faster clamping, there is no buffer padding for the constructed texture
|
||||||
|
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
|
||||||
|
|
||||||
// returns whether the current context supports image2d_t format or not
|
// returns whether the current context supports image2d_t format or not
|
||||||
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
|
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
|
||||||
|
|
||||||
@ -132,7 +159,7 @@ namespace cv
|
|||||||
};
|
};
|
||||||
template<DEVICE_INFO _it, typename _ty>
|
template<DEVICE_INFO _it, typename _ty>
|
||||||
_ty queryDeviceInfo(cl_kernel kernel = NULL);
|
_ty queryDeviceInfo(cl_kernel kernel = NULL);
|
||||||
//info should have been pre-allocated
|
|
||||||
template<>
|
template<>
|
||||||
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
|
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
|
||||||
template<>
|
template<>
|
||||||
|
@ -782,45 +782,55 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
|
template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
|
||||||
|
const oclMat &mask, oclMat &buf)
|
||||||
{
|
{
|
||||||
size_t groupnum = src.clCxt->computeUnits();
|
size_t groupnum = src.clCxt->computeUnits();
|
||||||
CV_Assert(groupnum != 0);
|
CV_Assert(groupnum != 0);
|
||||||
groupnum = groupnum * 2;
|
groupnum = groupnum * 2;
|
||||||
int vlen = 8;
|
int vlen = 8;
|
||||||
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
|
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
|
||||||
Context *clCxt = src.clCxt;
|
|
||||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
|
ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
|
||||||
*minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
|
|
||||||
|
cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
|
||||||
|
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
{
|
{
|
||||||
arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax");
|
arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask");
|
arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask");
|
||||||
}
|
}
|
||||||
T *p = new T[groupnum * vlen * 2];
|
|
||||||
memset(p, 0, dbsize);
|
Mat matbuf = Mat(buf);
|
||||||
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
|
T *p = matbuf.ptr<T>();
|
||||||
if(minVal != NULL){
|
if(minVal != NULL)
|
||||||
|
{
|
||||||
|
*minVal = std::numeric_limits<double>::max();
|
||||||
for(int i = 0; i < vlen * (int)groupnum; i++)
|
for(int i = 0; i < vlen * (int)groupnum; i++)
|
||||||
{
|
{
|
||||||
*minVal = *minVal < p[i] ? *minVal : p[i];
|
*minVal = *minVal < p[i] ? *minVal : p[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(maxVal != NULL){
|
if(maxVal != NULL)
|
||||||
|
{
|
||||||
|
*maxVal = -std::numeric_limits<double>::max();
|
||||||
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
|
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
|
||||||
{
|
{
|
||||||
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
|
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
delete[] p;
|
|
||||||
openCLFree(dstBuffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
|
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
|
||||||
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
|
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
|
||||||
|
{
|
||||||
|
oclMat buf;
|
||||||
|
minMax_buf(src, minVal, maxVal, mask, buf);
|
||||||
|
}
|
||||||
|
void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
|
||||||
{
|
{
|
||||||
CV_Assert(src.oclchannels() == 1);
|
CV_Assert(src.oclchannels() == 1);
|
||||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
@ -840,7 +850,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc
|
|||||||
};
|
};
|
||||||
minMaxFunc func;
|
minMaxFunc func;
|
||||||
func = functab[src.depth()];
|
func = functab[src.depth()];
|
||||||
func(src, minVal, maxVal, mask);
|
func(src, minVal, maxVal, mask, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
351
modules/ocl/src/gfft.cpp
Normal file
351
modules/ocl/src/gfft.cpp
Normal file
@ -0,0 +1,351 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Peng Xiao, pengxiao@outlook.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
#include <iomanip>
|
||||||
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::ocl;
|
||||||
|
|
||||||
|
static bool use_cpu_sorter = true;
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace ocl
|
||||||
|
{
|
||||||
|
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||||
|
extern const char *imgproc_gfft;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
enum SortMethod
|
||||||
|
{
|
||||||
|
CPU_STL,
|
||||||
|
BITONIC,
|
||||||
|
SELECTION
|
||||||
|
};
|
||||||
|
|
||||||
|
const int GROUP_SIZE = 256;
|
||||||
|
|
||||||
|
template<SortMethod method>
|
||||||
|
struct Sorter
|
||||||
|
{
|
||||||
|
//typedef EigType;
|
||||||
|
};
|
||||||
|
|
||||||
|
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
|
||||||
|
template<>
|
||||||
|
struct Sorter<CPU_STL>
|
||||||
|
{
|
||||||
|
typedef oclMat EigType;
|
||||||
|
static cv::Mutex cs;
|
||||||
|
static Mat mat_eig;
|
||||||
|
|
||||||
|
//prototype
|
||||||
|
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
|
||||||
|
{
|
||||||
|
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
|
||||||
|
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
|
||||||
|
return v1 > v2;
|
||||||
|
}
|
||||||
|
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||||
|
{
|
||||||
|
cv::AutoLock lock(cs);
|
||||||
|
//temporarily use STL's sort function
|
||||||
|
Mat mat_corners = corners;
|
||||||
|
mat_eig = eig_tex;
|
||||||
|
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
|
||||||
|
corners = mat_corners;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
cv::Mutex Sorter<CPU_STL>::cs;
|
||||||
|
cv::Mat Sorter<CPU_STL>::mat_eig;
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct Sorter<BITONIC>
|
||||||
|
{
|
||||||
|
typedef TextureCL EigType;
|
||||||
|
|
||||||
|
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||||
|
{
|
||||||
|
Context * cxt = Context::getContext();
|
||||||
|
size_t globalThreads[3] = {count / 2, 1, 1};
|
||||||
|
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
||||||
|
|
||||||
|
// 2^numStages should be equal to count or the output is invalid
|
||||||
|
int numStages = 0;
|
||||||
|
for(int i = count; i > 1; i >>= 1)
|
||||||
|
{
|
||||||
|
++numStages;
|
||||||
|
}
|
||||||
|
const int argc = 5;
|
||||||
|
std::vector< std::pair<size_t, const void *> > args(argc);
|
||||||
|
std::string kernelname = "sortCorners_bitonicSort";
|
||||||
|
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
|
||||||
|
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
|
||||||
|
args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
|
||||||
|
for(int stage = 0; stage < numStages; ++stage)
|
||||||
|
{
|
||||||
|
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
|
||||||
|
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
|
||||||
|
{
|
||||||
|
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
|
||||||
|
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct Sorter<SELECTION>
|
||||||
|
{
|
||||||
|
typedef TextureCL EigType;
|
||||||
|
|
||||||
|
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||||
|
{
|
||||||
|
Context * cxt = Context::getContext();
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {count, 1, 1};
|
||||||
|
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
||||||
|
|
||||||
|
std::vector< std::pair<size_t, const void *> > args;
|
||||||
|
//local
|
||||||
|
std::string kernelname = "sortCorners_selectionSortLocal";
|
||||||
|
int lds_size = GROUP_SIZE * sizeof(cl_float2);
|
||||||
|
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
|
||||||
|
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
|
||||||
|
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
|
||||||
|
args.push_back( std::make_pair( lds_size, (void*)NULL) );
|
||||||
|
|
||||||
|
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||||
|
|
||||||
|
//final
|
||||||
|
kernelname = "sortCorners_selectionSortFinal";
|
||||||
|
args.pop_back();
|
||||||
|
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int findCorners_caller(
|
||||||
|
const TextureCL& eig,
|
||||||
|
const float threshold,
|
||||||
|
const oclMat& mask,
|
||||||
|
oclMat& corners,
|
||||||
|
const int max_count)
|
||||||
|
{
|
||||||
|
std::vector<int> k;
|
||||||
|
Context * cxt = Context::getContext();
|
||||||
|
|
||||||
|
std::vector< std::pair<size_t, const void*> > args;
|
||||||
|
std::string kernelname = "findCorners";
|
||||||
|
|
||||||
|
const int mask_strip = mask.step / mask.elemSize1();
|
||||||
|
|
||||||
|
oclMat g_counter(1, 1, CV_32SC1);
|
||||||
|
g_counter.setTo(0);
|
||||||
|
|
||||||
|
args.push_back(make_pair( sizeof(cl_mem), (void*)&eig ));
|
||||||
|
args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data ));
|
||||||
|
args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data ));
|
||||||
|
args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip));
|
||||||
|
args.push_back(make_pair( sizeof(cl_float), (void*)&threshold ));
|
||||||
|
args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows ));
|
||||||
|
args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols ));
|
||||||
|
args.push_back(make_pair( sizeof(cl_int), (void*)&max_count ));
|
||||||
|
args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {eig.cols, eig.rows, 1};
|
||||||
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
|
||||||
|
const char * opt = mask.empty() ? "" : "-D WITH_MASK";
|
||||||
|
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt);
|
||||||
|
return std::min(Mat(g_counter).at<int>(0), max_count);
|
||||||
|
}
|
||||||
|
}//unnamed namespace
|
||||||
|
|
||||||
|
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
|
||||||
|
{
|
||||||
|
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
|
||||||
|
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
|
||||||
|
|
||||||
|
CV_DbgAssert(support_image2d());
|
||||||
|
|
||||||
|
ensureSizeIsEnough(image.size(), CV_32F, eig_);
|
||||||
|
|
||||||
|
if (useHarrisDetector)
|
||||||
|
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
|
||||||
|
else
|
||||||
|
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
|
||||||
|
|
||||||
|
double maxVal = 0;
|
||||||
|
minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_);
|
||||||
|
|
||||||
|
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
|
||||||
|
|
||||||
|
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
|
||||||
|
int total = findCorners_caller(
|
||||||
|
*eig_tex,
|
||||||
|
static_cast<float>(maxVal * qualityLevel),
|
||||||
|
mask,
|
||||||
|
tmpCorners_,
|
||||||
|
tmpCorners_.cols);
|
||||||
|
|
||||||
|
if (total == 0)
|
||||||
|
{
|
||||||
|
corners.release();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(use_cpu_sorter)
|
||||||
|
{
|
||||||
|
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//if total is power of 2
|
||||||
|
if(((total - 1) & (total)) == 0)
|
||||||
|
{
|
||||||
|
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minDistance < 1)
|
||||||
|
{
|
||||||
|
corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vector<Point2f> tmp(total);
|
||||||
|
downloadPoints(tmpCorners_, tmp);
|
||||||
|
|
||||||
|
vector<Point2f> tmp2;
|
||||||
|
tmp2.reserve(total);
|
||||||
|
|
||||||
|
const int cell_size = cvRound(minDistance);
|
||||||
|
const int grid_width = (image.cols + cell_size - 1) / cell_size;
|
||||||
|
const int grid_height = (image.rows + cell_size - 1) / cell_size;
|
||||||
|
|
||||||
|
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
|
||||||
|
|
||||||
|
for (int i = 0; i < total; ++i)
|
||||||
|
{
|
||||||
|
Point2f p = tmp[i];
|
||||||
|
|
||||||
|
bool good = true;
|
||||||
|
|
||||||
|
int x_cell = static_cast<int>(p.x / cell_size);
|
||||||
|
int y_cell = static_cast<int>(p.y / cell_size);
|
||||||
|
|
||||||
|
int x1 = x_cell - 1;
|
||||||
|
int y1 = y_cell - 1;
|
||||||
|
int x2 = x_cell + 1;
|
||||||
|
int y2 = y_cell + 1;
|
||||||
|
|
||||||
|
// boundary check
|
||||||
|
x1 = std::max(0, x1);
|
||||||
|
y1 = std::max(0, y1);
|
||||||
|
x2 = std::min(grid_width - 1, x2);
|
||||||
|
y2 = std::min(grid_height - 1, y2);
|
||||||
|
|
||||||
|
for (int yy = y1; yy <= y2; yy++)
|
||||||
|
{
|
||||||
|
for (int xx = x1; xx <= x2; xx++)
|
||||||
|
{
|
||||||
|
vector<Point2f>& m = grid[yy * grid_width + xx];
|
||||||
|
|
||||||
|
if (!m.empty())
|
||||||
|
{
|
||||||
|
for(size_t j = 0; j < m.size(); j++)
|
||||||
|
{
|
||||||
|
float dx = p.x - m[j].x;
|
||||||
|
float dy = p.y - m[j].y;
|
||||||
|
|
||||||
|
if (dx * dx + dy * dy < minDistance * minDistance)
|
||||||
|
{
|
||||||
|
good = false;
|
||||||
|
goto break_out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break_out:
|
||||||
|
|
||||||
|
if(good)
|
||||||
|
{
|
||||||
|
grid[y_cell * grid_width + x_cell].push_back(p);
|
||||||
|
|
||||||
|
tmp2.push_back(p);
|
||||||
|
|
||||||
|
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector<Point2f> &points_v)
|
||||||
|
{
|
||||||
|
CV_DbgAssert(points.type() == CV_32FC2);
|
||||||
|
points_v.resize(points.cols);
|
||||||
|
openCLSafeCall(clEnqueueReadBuffer(
|
||||||
|
*reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
|
||||||
|
reinterpret_cast<cl_mem>(points.data),
|
||||||
|
CL_TRUE,
|
||||||
|
0,
|
||||||
|
points.cols * sizeof(Point2f),
|
||||||
|
&points_v[0],
|
||||||
|
0,
|
||||||
|
NULL,
|
||||||
|
NULL));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -1207,30 +1207,41 @@ namespace cv
|
|||||||
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
|
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
|
||||||
double k, int borderType)
|
double k, int borderType)
|
||||||
{
|
{
|
||||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
oclMat dx, dy;
|
||||||
{
|
cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType);
|
||||||
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
|
||||||
}
|
|
||||||
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
|
|
||||||
oclMat Dx, Dy;
|
|
||||||
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
|
|
||||||
extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
|
|
||||||
dst.create(src.size(), CV_32F);
|
|
||||||
corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), Dx, Dy, dst, borderType);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
|
void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
|
||||||
|
double k, int borderType)
|
||||||
{
|
{
|
||||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
||||||
}
|
}
|
||||||
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
|
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
|
||||||
oclMat Dx, Dy;
|
|
||||||
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
|
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
|
||||||
extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
|
extractCovData(src, dx, dy, blockSize, ksize, borderType);
|
||||||
dst.create(src.size(), CV_32F);
|
dst.create(src.size(), CV_32F);
|
||||||
corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType);
|
corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
|
||||||
|
{
|
||||||
|
oclMat dx, dy;
|
||||||
|
cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
|
||||||
|
{
|
||||||
|
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
|
{
|
||||||
|
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
||||||
|
}
|
||||||
|
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
|
||||||
|
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
|
||||||
|
extractCovData(src, dx, dy, blockSize, ksize, borderType);
|
||||||
|
dst.create(src.size(), CV_32F);
|
||||||
|
corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
|
||||||
}
|
}
|
||||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||||
static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
|
static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
|
||||||
|
@ -156,7 +156,7 @@ namespace cv
|
|||||||
format.image_channel_order = CL_RGBA;
|
format.image_channel_order = CL_RGBA;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(-1, "Image forma is not supported");
|
CV_Error(-1, "Image format is not supported");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#ifdef CL_VERSION_1_2
|
#ifdef CL_VERSION_1_2
|
||||||
@ -225,6 +225,11 @@ namespace cv
|
|||||||
openCLSafeCall(err);
|
openCLSafeCall(err);
|
||||||
return texture;
|
return texture;
|
||||||
}
|
}
|
||||||
|
Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
|
||||||
|
{
|
||||||
|
return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
|
||||||
|
}
|
||||||
|
|
||||||
void releaseTexture(cl_mem& texture)
|
void releaseTexture(cl_mem& texture)
|
||||||
{
|
{
|
||||||
openCLFree(texture);
|
openCLFree(texture);
|
||||||
|
276
modules/ocl/src/opencl/imgproc_gfft.cl
Normal file
276
modules/ocl/src/opencl/imgproc_gfft.cl
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Peng Xiao, pengxiao@outlook.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef WITH_MASK
|
||||||
|
#define WITH_MASK 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
||||||
|
|
||||||
|
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
|
||||||
|
{
|
||||||
|
return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
|
||||||
|
{
|
||||||
|
return read_imagef(_eig, sampler, pt).x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel
|
||||||
|
void findCorners
|
||||||
|
(
|
||||||
|
image2d_t eig,
|
||||||
|
__global const char * mask,
|
||||||
|
__global float2 * corners,
|
||||||
|
const int mask_strip,// in pixels
|
||||||
|
const float threshold,
|
||||||
|
const int rows,
|
||||||
|
const int cols,
|
||||||
|
const int max_count,
|
||||||
|
__global int * g_counter
|
||||||
|
)
|
||||||
|
{
|
||||||
|
const int j = get_global_id(0);
|
||||||
|
const int i = get_global_id(1);
|
||||||
|
|
||||||
|
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
|
||||||
|
#if WITH_MASK
|
||||||
|
&& mask[i * mask_strip + j] != 0
|
||||||
|
#endif
|
||||||
|
)
|
||||||
|
{
|
||||||
|
const float val = ELEM_INT2(eig, j, i);
|
||||||
|
|
||||||
|
if (val > threshold)
|
||||||
|
{
|
||||||
|
float maxVal = val;
|
||||||
|
|
||||||
|
maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
|
||||||
|
maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal);
|
||||||
|
maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
|
||||||
|
|
||||||
|
maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
|
||||||
|
maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
|
||||||
|
|
||||||
|
maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
|
||||||
|
maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
|
||||||
|
maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
|
||||||
|
|
||||||
|
if (val == maxVal)
|
||||||
|
{
|
||||||
|
const int ind = atomic_inc(g_counter);
|
||||||
|
|
||||||
|
if (ind < max_count)
|
||||||
|
corners[ind] = (float2)(j, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//bitonic sort
|
||||||
|
__kernel
|
||||||
|
void sortCorners_bitonicSort
|
||||||
|
(
|
||||||
|
image2d_t eig,
|
||||||
|
__global float2 * corners,
|
||||||
|
const int count,
|
||||||
|
const int stage,
|
||||||
|
const int passOfStage
|
||||||
|
)
|
||||||
|
{
|
||||||
|
const int threadId = get_global_id(0);
|
||||||
|
if(threadId >= count / 2)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
|
||||||
|
|
||||||
|
const int pairDistance = 1 << (stage - passOfStage);
|
||||||
|
const int blockWidth = 2 * pairDistance;
|
||||||
|
|
||||||
|
const int leftId = min( (threadId % pairDistance)
|
||||||
|
+ (threadId / pairDistance) * blockWidth, count );
|
||||||
|
|
||||||
|
const int rightId = min( leftId + pairDistance, count );
|
||||||
|
|
||||||
|
const float2 leftPt = corners[leftId];
|
||||||
|
const float2 rightPt = corners[rightId];
|
||||||
|
|
||||||
|
const float leftVal = ELEM_FLT2(eig, leftPt);
|
||||||
|
const float rightVal = ELEM_FLT2(eig, rightPt);
|
||||||
|
|
||||||
|
const bool compareResult = leftVal > rightVal;
|
||||||
|
|
||||||
|
float2 greater = compareResult ? leftPt:rightPt;
|
||||||
|
float2 lesser = compareResult ? rightPt:leftPt;
|
||||||
|
|
||||||
|
corners[leftId] = sortOrder ? lesser : greater;
|
||||||
|
corners[rightId] = sortOrder ? greater : lesser;
|
||||||
|
}
|
||||||
|
|
||||||
|
//selection sort for gfft
|
||||||
|
//kernel is ported from Bolt library:
|
||||||
|
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
|
||||||
|
// Local sort will firstly sort elements of each workgroup using selection sort
|
||||||
|
// its performance is O(n)
|
||||||
|
__kernel
|
||||||
|
void sortCorners_selectionSortLocal
|
||||||
|
(
|
||||||
|
image2d_t eig,
|
||||||
|
__global float2 * corners,
|
||||||
|
const int count,
|
||||||
|
__local float2 * scratch
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int i = get_local_id(0); // index in workgroup
|
||||||
|
int numOfGroups = get_num_groups(0); // index in workgroup
|
||||||
|
int groupID = get_group_id(0);
|
||||||
|
int wg = get_local_size(0); // workgroup size = block size
|
||||||
|
int n; // number of elements to be processed for this work group
|
||||||
|
|
||||||
|
int offset = groupID * wg;
|
||||||
|
int same = 0;
|
||||||
|
corners += offset;
|
||||||
|
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
|
||||||
|
float2 pt1, pt2;
|
||||||
|
|
||||||
|
pt1 = corners[min(i, n)];
|
||||||
|
scratch[i] = pt1;
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
if(i >= n)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
float val1 = ELEM_FLT2(eig, pt1);
|
||||||
|
float val2;
|
||||||
|
|
||||||
|
int pos = 0;
|
||||||
|
for (int j=0;j<n;++j)
|
||||||
|
{
|
||||||
|
pt2 = scratch[j];
|
||||||
|
val2 = ELEM_FLT2(eig, pt2);
|
||||||
|
if(val2 > val1)
|
||||||
|
pos++;//calculate the rank of this element in this work group
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(val1 > val2)
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// val1 and val2 are same
|
||||||
|
same++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int j=0; j< same; j++)
|
||||||
|
corners[pos + j] = pt1;
|
||||||
|
}
|
||||||
|
__kernel
|
||||||
|
void sortCorners_selectionSortFinal
|
||||||
|
(
|
||||||
|
image2d_t eig,
|
||||||
|
__global float2 * corners,
|
||||||
|
const int count
|
||||||
|
)
|
||||||
|
{
|
||||||
|
const int i = get_local_id(0); // index in workgroup
|
||||||
|
const int numOfGroups = get_num_groups(0); // index in workgroup
|
||||||
|
const int groupID = get_group_id(0);
|
||||||
|
const int wg = get_local_size(0); // workgroup size = block size
|
||||||
|
int pos = 0, same = 0;
|
||||||
|
const int offset = get_group_id(0) * wg;
|
||||||
|
const int remainder = count - wg*(numOfGroups-1);
|
||||||
|
|
||||||
|
if((offset + i ) >= count)
|
||||||
|
return;
|
||||||
|
float2 pt1, pt2;
|
||||||
|
pt1 = corners[groupID*wg + i];
|
||||||
|
|
||||||
|
float val1 = ELEM_FLT2(eig, pt1);
|
||||||
|
float val2;
|
||||||
|
|
||||||
|
for(int j=0; j<numOfGroups-1; j++ )
|
||||||
|
{
|
||||||
|
for(int k=0; k<wg; k++)
|
||||||
|
{
|
||||||
|
pt2 = corners[j*wg + k];
|
||||||
|
val2 = ELEM_FLT2(eig, pt2);
|
||||||
|
if(val1 > val2)
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//Increment only if the value is not the same.
|
||||||
|
if( val2 > val1 )
|
||||||
|
pos++;
|
||||||
|
else
|
||||||
|
same++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int k=0; k<remainder; k++)
|
||||||
|
{
|
||||||
|
pt2 = corners[(numOfGroups-1)*wg + k];
|
||||||
|
val2 = ELEM_FLT2(eig, pt2);
|
||||||
|
if(val1 > val2)
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//Don't increment if the value is the same.
|
||||||
|
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
|
||||||
|
if(val2 > val1)
|
||||||
|
pos++;
|
||||||
|
else
|
||||||
|
same++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int j=0; j< same; j++)
|
||||||
|
corners[pos + j] = pt1;
|
||||||
|
}
|
||||||
|
|
@ -55,6 +55,83 @@ using namespace testing;
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
extern string workdir;
|
extern string workdir;
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// GoodFeaturesToTrack
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
IMPLEMENT_PARAM_CLASS(MinDistance, double)
|
||||||
|
}
|
||||||
|
PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance)
|
||||||
|
{
|
||||||
|
double minDistance;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
minDistance = GET_PARAM(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(GoodFeaturesToTrack, Accuracy)
|
||||||
|
{
|
||||||
|
cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
|
||||||
|
ASSERT_FALSE(frame.empty());
|
||||||
|
|
||||||
|
int maxCorners = 1000;
|
||||||
|
double qualityLevel = 0.01;
|
||||||
|
|
||||||
|
cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
|
||||||
|
|
||||||
|
cv::ocl::oclMat d_pts;
|
||||||
|
detector(oclMat(frame), d_pts);
|
||||||
|
|
||||||
|
ASSERT_FALSE(d_pts.empty());
|
||||||
|
|
||||||
|
std::vector<cv::Point2f> pts(d_pts.cols);
|
||||||
|
|
||||||
|
detector.downloadPoints(d_pts, pts);
|
||||||
|
|
||||||
|
std::vector<cv::Point2f> pts_gold;
|
||||||
|
cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance);
|
||||||
|
|
||||||
|
ASSERT_EQ(pts_gold.size(), pts.size());
|
||||||
|
|
||||||
|
size_t mistmatch = 0;
|
||||||
|
for (size_t i = 0; i < pts.size(); ++i)
|
||||||
|
{
|
||||||
|
cv::Point2i a = pts_gold[i];
|
||||||
|
cv::Point2i b = pts[i];
|
||||||
|
|
||||||
|
bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
|
||||||
|
|
||||||
|
if (!eq)
|
||||||
|
++mistmatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
double bad_ratio = static_cast<double>(mistmatch) / pts.size();
|
||||||
|
|
||||||
|
ASSERT_LE(bad_ratio, 0.01);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(GoodFeaturesToTrack, EmptyCorners)
|
||||||
|
{
|
||||||
|
int maxCorners = 1000;
|
||||||
|
double qualityLevel = 0.01;
|
||||||
|
|
||||||
|
cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
|
||||||
|
|
||||||
|
cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
|
||||||
|
cv::ocl::oclMat corners(1, maxCorners, CV_32FC2);
|
||||||
|
|
||||||
|
detector(src, corners);
|
||||||
|
|
||||||
|
ASSERT_TRUE(corners.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
|
||||||
|
testing::Values(MinDistance(0.0), MinDistance(3.0)));
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
PARAM_TEST_CASE(TVL1, bool)
|
PARAM_TEST_CASE(TVL1, bool)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user