update some of the functions in ocl module to the latest version
This commit is contained in:
@@ -59,11 +59,11 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& e
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *imgproc_canny;
|
||||
}
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *imgproc_canny;
|
||||
}
|
||||
}
|
||||
|
||||
cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_)
|
||||
@@ -75,32 +75,35 @@ cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(
|
||||
|
||||
void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
|
||||
{
|
||||
dx.create(image_size, CV_32SC1);
|
||||
dy.create(image_size, CV_32SC1);
|
||||
dx.create(image_size, CV_32SC1);
|
||||
dy.create(image_size, CV_32SC1);
|
||||
|
||||
if(apperture_size == 3)
|
||||
{
|
||||
dx_buf.create(image_size, CV_32SC1);
|
||||
dy_buf.create(image_size, CV_32SC1);
|
||||
}
|
||||
else if(apperture_size > 0)
|
||||
if(apperture_size == 3)
|
||||
{
|
||||
Mat kx, ky;
|
||||
if (!filterDX)
|
||||
{
|
||||
filterDX = createDerivFilter_GPU(CV_32F, CV_32F, 1, 0, apperture_size, BORDER_REPLICATE);
|
||||
}
|
||||
if (!filterDY)
|
||||
{
|
||||
filterDY = createDerivFilter_GPU(CV_32F, CV_32F, 0, 1, apperture_size, BORDER_REPLICATE);
|
||||
}
|
||||
dx_buf.create(image_size, CV_32SC1);
|
||||
dy_buf.create(image_size, CV_32SC1);
|
||||
}
|
||||
edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
|
||||
|
||||
trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
|
||||
trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
|
||||
else if(apperture_size > 0)
|
||||
{
|
||||
Mat kx, ky;
|
||||
if (!filterDX)
|
||||
{
|
||||
filterDX = createDerivFilter_GPU(CV_8U, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
|
||||
}
|
||||
if (!filterDY)
|
||||
{
|
||||
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
|
||||
}
|
||||
}
|
||||
edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
|
||||
|
||||
counter.create(1,1, CV_32SC1);
|
||||
trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
|
||||
trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
|
||||
|
||||
float counter_f [1] = { 0 };
|
||||
int err = 0;
|
||||
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
|
||||
openCLSafeCall(err);
|
||||
}
|
||||
|
||||
void cv::ocl::CannyBuf::release()
|
||||
@@ -112,12 +115,12 @@ void cv::ocl::CannyBuf::release()
|
||||
edgeBuf.release();
|
||||
trackBuf1.release();
|
||||
trackBuf2.release();
|
||||
counter.release();
|
||||
openCLFree(counter);
|
||||
}
|
||||
|
||||
namespace cv { namespace ocl {
|
||||
namespace canny
|
||||
{
|
||||
namespace canny
|
||||
{
|
||||
void calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols);
|
||||
|
||||
void calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad);
|
||||
@@ -125,12 +128,12 @@ namespace cv { namespace ocl {
|
||||
|
||||
void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh);
|
||||
|
||||
void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols);
|
||||
void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols);
|
||||
|
||||
void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols);
|
||||
void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols);
|
||||
|
||||
void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols);
|
||||
}
|
||||
}
|
||||
}}// cv::ocl
|
||||
|
||||
namespace
|
||||
@@ -164,11 +167,10 @@ void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_th
|
||||
std::swap( low_thresh, high_thresh );
|
||||
|
||||
dst.create(src.size(), CV_8U);
|
||||
dst.setTo(Scalar::all(0));
|
||||
//dst.setTo(Scalar::all(0));
|
||||
|
||||
buf.create(src.size(), apperture_size);
|
||||
buf.edgeBuf.setTo(Scalar::all(0));
|
||||
buf.counter.setTo(Scalar::all(0));
|
||||
//buf.edgeBuf.setTo(Scalar::all(0));
|
||||
|
||||
if (apperture_size == 3)
|
||||
{
|
||||
@@ -178,17 +180,8 @@ void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_th
|
||||
}
|
||||
else
|
||||
{
|
||||
// FIXME:
|
||||
// current ocl implementation requires the src and dst having same type
|
||||
// convertTo is time consuming so this may be optimized later.
|
||||
oclMat src_omat32f = src;
|
||||
src.convertTo(src_omat32f, CV_32F); // FIXME
|
||||
|
||||
buf.filterDX->apply(src_omat32f, buf.dx);
|
||||
buf.filterDY->apply(src_omat32f, buf.dy);
|
||||
|
||||
buf.dx.convertTo(buf.dx, CV_32S); // FIXME
|
||||
buf.dy.convertTo(buf.dy, CV_32S); // FIXME
|
||||
buf.filterDX->apply(src, buf.dx);
|
||||
buf.filterDY->apply(src, buf.dy);
|
||||
|
||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
|
||||
}
|
||||
@@ -210,12 +203,11 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d
|
||||
std::swap( low_thresh, high_thresh);
|
||||
|
||||
dst.create(dx.size(), CV_8U);
|
||||
dst.setTo(Scalar::all(0));
|
||||
//dst.setTo(Scalar::all(0));
|
||||
|
||||
buf.dx = dx; buf.dy = dy;
|
||||
buf.create(dx.size(), -1);
|
||||
buf.edgeBuf.setTo(Scalar::all(0));
|
||||
buf.counter.setTo(Scalar::all(0));
|
||||
//buf.edgeBuf.setTo(Scalar::all(0));
|
||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
|
||||
|
||||
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
|
||||
@@ -223,197 +215,197 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d
|
||||
|
||||
void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols)
|
||||
{
|
||||
Context *clCxt = src.clCxt;
|
||||
string kernelName = "calcSobelRowPass";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
Context *clCxt = src.clCxt;
|
||||
string kernelName = "calcSobelRowPass";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
|
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad)
|
||||
{
|
||||
Context *clCxt = dx_buf.clCxt;
|
||||
string kernelName = "calcMagnitude_buf";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
Context *clCxt = dx_buf.clCxt;
|
||||
string kernelName = "calcMagnitude_buf";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
|
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
char build_options [15] = "";
|
||||
if(L2Grad)
|
||||
{
|
||||
strcat(build_options, "-D L2GRAD");
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
char build_options [15] = "";
|
||||
if(L2Grad)
|
||||
{
|
||||
strcat(build_options, "-D L2GRAD");
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
}
|
||||
void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad)
|
||||
{
|
||||
Context *clCxt = dx.clCxt;
|
||||
string kernelName = "calcMagnitude";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
Context *clCxt = dx.clCxt;
|
||||
string kernelName = "calcMagnitude";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
|
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
char build_options [15] = "";
|
||||
if(L2Grad)
|
||||
{
|
||||
strcat(build_options, "-D L2GRAD");
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
char build_options [15] = "";
|
||||
if(L2Grad)
|
||||
{
|
||||
strcat(build_options, "-D L2GRAD");
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
}
|
||||
|
||||
void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh)
|
||||
{
|
||||
Context *clCxt = dx.clCxt;
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
Context *clCxt = dx.clCxt;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_float), (void *)&low_thresh));
|
||||
args.push_back( make_pair( sizeof(cl_float), (void *)&high_thresh));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_float), (void *)&low_thresh));
|
||||
args.push_back( make_pair( sizeof(cl_float), (void *)&high_thresh));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
|
||||
#if CALCMAP_FIXED
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
string kernelName = "calcMap";
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
string kernelName = "calcMap";
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
#else
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
string kernelName = "calcMap_2";
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
string kernelName = "calcMap_2";
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
#endif
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols)
|
||||
void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols)
|
||||
{
|
||||
Context *clCxt = map.clCxt;
|
||||
string kernelName = "edgesHysteresisLocal";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
Context *clCxt = map.clCxt;
|
||||
string kernelName = "edgesHysteresisLocal";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols)
|
||||
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols)
|
||||
{
|
||||
unsigned int count = Mat(counter).at<unsigned int>(0);
|
||||
|
||||
Context *clCxt = map.clCxt;
|
||||
string kernelName = "edgesHysteresisGlobal";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
size_t localThreads[3] = {128, 1, 1};
|
||||
unsigned int count;
|
||||
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
|
||||
Context *clCxt = map.clCxt;
|
||||
string kernelName = "edgesHysteresisGlobal";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
size_t localThreads[3] = {128, 1, 1};
|
||||
|
||||
#define DIVUP(a, b) ((a)+(b)-1)/(b)
|
||||
|
||||
while(count > 0)
|
||||
{
|
||||
counter.setTo(0);
|
||||
args.clear();
|
||||
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&count));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
while(count > 0)
|
||||
{
|
||||
//counter.setTo(0);
|
||||
args.clear();
|
||||
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&count));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
count = Mat(counter).at<unsigned int>(0);
|
||||
std::swap(st1, st2);
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
|
||||
std::swap(st1, st2);
|
||||
}
|
||||
#undef DIVUP
|
||||
}
|
||||
|
||||
void canny::getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols)
|
||||
{
|
||||
Context *clCxt = map.clCxt;
|
||||
string kernelName = "getEdges";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
Context *clCxt = map.clCxt;
|
||||
string kernelName = "getEdges";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
|
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
@@ -67,7 +67,9 @@ namespace cv
|
||||
|
||||
void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size());
|
||||
CV_Assert(src.type() == CV_32FC1);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
Context *clCxt = src.clCxt;
|
||||
|
||||
|
@@ -119,6 +119,8 @@ namespace cv { namespace ocl { namespace device
|
||||
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
|
||||
void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
|
||||
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
|
||||
|
||||
void resize( const oclMat &src, oclMat &dst, const Size sz);
|
||||
}
|
||||
}}}
|
||||
|
||||
@@ -150,6 +152,8 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
|
||||
|
||||
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
|
||||
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
|
||||
|
||||
effect_size = Size(0, 0);
|
||||
}
|
||||
|
||||
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
|
||||
@@ -199,22 +203,37 @@ void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float>& _detector)
|
||||
CV_Assert(checkDetectorSize());
|
||||
}
|
||||
|
||||
void cv::ocl::HOGDescriptor::init_buffer(const oclMat& img, Size win_stride)
|
||||
{
|
||||
if (!image_scale.empty())
|
||||
return;
|
||||
|
||||
if (effect_size == Size(0, 0))
|
||||
effect_size = img.size();
|
||||
|
||||
grad.create(img.size(), CV_32FC2);
|
||||
qangle.create(img.size(), CV_8UC2);
|
||||
|
||||
const size_t block_hist_size = getBlockHistogramSize();
|
||||
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
|
||||
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
|
||||
|
||||
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
|
||||
labels.create(1, wins_per_img.area(), CV_8U);
|
||||
}
|
||||
|
||||
void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle)
|
||||
{
|
||||
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
|
||||
|
||||
grad.create(img.size(), CV_32FC2);
|
||||
|
||||
qangle.create(img.size(), CV_8UC2);
|
||||
|
||||
float angleScale = (float)(nbins / CV_PI);
|
||||
switch (img.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
hog::compute_gradients_8UC1(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
|
||||
hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
|
||||
break;
|
||||
case CV_8UC4:
|
||||
hog::compute_gradients_8UC4(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
|
||||
hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -224,14 +243,11 @@ void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat& img)
|
||||
{
|
||||
computeGradient(img, grad, qangle);
|
||||
|
||||
size_t block_hist_size = getBlockHistogramSize();
|
||||
Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
|
||||
hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
|
||||
grad, qangle, (float)getWinSigma(), block_hists);
|
||||
|
||||
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
|
||||
|
||||
hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, grad, qangle, (float)getWinSigma(), block_hists);
|
||||
|
||||
hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, block_hists, (float)threshold_L2hys);
|
||||
hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
|
||||
block_hists, (float)threshold_L2hys);
|
||||
}
|
||||
|
||||
|
||||
@@ -239,11 +255,13 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride,
|
||||
{
|
||||
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
|
||||
|
||||
init_buffer(img, win_stride);
|
||||
|
||||
computeBlockHistograms(img);
|
||||
|
||||
const size_t block_hist_size = getBlockHistogramSize();
|
||||
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
|
||||
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
|
||||
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
|
||||
|
||||
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
|
||||
|
||||
@@ -251,11 +269,11 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride,
|
||||
{
|
||||
case DESCR_FORMAT_ROW_BY_ROW:
|
||||
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
||||
win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors);
|
||||
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
|
||||
break;
|
||||
case DESCR_FORMAT_COL_BY_COL:
|
||||
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
||||
win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors);
|
||||
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "Unknown descriptor format");
|
||||
@@ -272,22 +290,21 @@ void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector<Point>& hits, doub
|
||||
if (detector.empty())
|
||||
return;
|
||||
|
||||
computeBlockHistograms(img);
|
||||
|
||||
if (win_stride == Size())
|
||||
win_stride = block_stride;
|
||||
else
|
||||
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
|
||||
init_buffer(img, win_stride);
|
||||
|
||||
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
|
||||
labels.create(1, wins_per_img.area(), CV_8U);
|
||||
computeBlockHistograms(img);
|
||||
|
||||
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
||||
win_stride.height, win_stride.width, img.rows, img.cols, block_hists,
|
||||
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists,
|
||||
detector, (float)free_coef, (float)hit_threshold, labels);
|
||||
|
||||
labels.download(labels_host);
|
||||
unsigned char* vec = labels_host.ptr();
|
||||
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
|
||||
for (int i = 0; i < wins_per_img.area(); i++)
|
||||
{
|
||||
int y = i / wins_per_img.width;
|
||||
@@ -303,6 +320,7 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& f
|
||||
Size win_stride, Size padding, double scale0, int group_threshold)
|
||||
{
|
||||
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
|
||||
CV_Assert(scale0 > 1);
|
||||
|
||||
vector<double> level_scale;
|
||||
double scale = 1.;
|
||||
@@ -318,27 +336,30 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& f
|
||||
}
|
||||
levels = std::max(levels, 1);
|
||||
level_scale.resize(levels);
|
||||
image_scales.resize(levels);
|
||||
|
||||
std::vector<Rect> all_candidates;
|
||||
vector<Point> locations;
|
||||
|
||||
if (win_stride == Size())
|
||||
win_stride = block_stride;
|
||||
else
|
||||
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
|
||||
init_buffer(img, win_stride);
|
||||
image_scale.create(img.size(), img.type());
|
||||
|
||||
for (size_t i = 0; i < level_scale.size(); i++)
|
||||
{
|
||||
scale = level_scale[i];
|
||||
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
|
||||
oclMat smaller_img;
|
||||
|
||||
if (sz == img.size())
|
||||
smaller_img = img;
|
||||
effect_size = Size(cvRound(img.cols / scale), cvRound(img.rows / scale));
|
||||
if (effect_size == img.size())
|
||||
{
|
||||
detect(img, locations, hit_threshold, win_stride, padding);
|
||||
}
|
||||
else
|
||||
{
|
||||
image_scales[i].create(sz, img.type());
|
||||
resize(img, image_scales[i], image_scales[i].size(), 0, 0, INTER_LINEAR);
|
||||
smaller_img = image_scales[i];
|
||||
hog::resize( img, image_scale, effect_size);
|
||||
detect(image_scale, locations, hit_threshold, win_stride, padding);
|
||||
}
|
||||
|
||||
detect(smaller_img, locations, hit_threshold, win_stride, padding);
|
||||
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
|
||||
for (size_t j = 0; j < locations.size(); j++)
|
||||
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));
|
||||
@@ -1784,4 +1805,36 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c
|
||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
|
||||
{
|
||||
CV_Assert( (src.channels() == dst.channels()) );
|
||||
Context *clCxt = Context::getContext();
|
||||
|
||||
string kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel";
|
||||
size_t blkSizeX = 16, blkSizeY = 16;
|
||||
size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX;
|
||||
size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY;
|
||||
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
|
||||
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
|
||||
|
||||
float ifx = (float)src.cols / sz.width;
|
||||
float ify = (float)src.rows / sz.height;
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.height));
|
||||
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
|
||||
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
|
||||
|
||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -67,32 +67,6 @@ __kernel void BlendLinear_C1_D0(
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C3_D0(
|
||||
__global uchar *dst,
|
||||
__global uchar *img1,
|
||||
__global uchar *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
int x = idx / 3;
|
||||
int y = idy;
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int pos = idy * istep + idx;
|
||||
int wpos = idy * (wstep /sizeof(float)) + x;
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C4_D0(
|
||||
__global uchar *dst,
|
||||
__global uchar *img1,
|
||||
@@ -143,32 +117,6 @@ __kernel void BlendLinear_C1_D5(
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C3_D5(
|
||||
__global float *dst,
|
||||
__global float *img1,
|
||||
__global float *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
int x = idx / 3;
|
||||
int y = idy;
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int pos = idy * (istep / sizeof(float)) + idx;
|
||||
int wpos = idy * (wstep /sizeof(float)) + x;
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C4_D5(
|
||||
__global float *dst,
|
||||
__global float *img1,
|
||||
@@ -194,3 +142,4 @@ __kernel void BlendLinear_C4_D5(
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -58,17 +58,9 @@
|
||||
// Image read mode
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
||||
|
||||
#define FLT_EPSILON (1e-15)
|
||||
#define CV_PI_F 3.14159265f
|
||||
|
||||
// print greyscale image to show image layout
|
||||
__kernel void printImage(image2d_t img)
|
||||
{
|
||||
printf("(%d, %d) - %3d \n",
|
||||
get_global_id(0),
|
||||
get_global_id(1),
|
||||
read_imageui(img, (int2)(get_global_id(0), get_global_id(1))).x
|
||||
);
|
||||
}
|
||||
|
||||
// Use integral image to calculate haar wavelets.
|
||||
// N = 2
|
||||
@@ -444,7 +436,6 @@ __kernel
|
||||
float val0 = N9[localLin];
|
||||
if (val0 > c_hessianThreshold)
|
||||
{
|
||||
//printf(\"(%3d, %3d) N9[%3d]=%7.1f val0=%7.1f\\n\", l_x, l_y, localLin - zoff, N9[localLin], val0);
|
||||
// Coordinates for the start of the wavelet in the sum image. There
|
||||
// is some integer division involved, so don't try to simplify this
|
||||
// (cancel out sampleStep) without checking the result is the same
|
||||
@@ -726,6 +717,7 @@ __kernel
|
||||
__global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
|
||||
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
|
||||
|
||||
|
||||
volatile __local float s_X[128];
|
||||
volatile __local float s_Y[128];
|
||||
volatile __local float s_angle[128];
|
||||
@@ -737,6 +729,7 @@ __kernel
|
||||
and building the keypoint descriptor are defined relative to 's' */
|
||||
const float s = featureSize[get_group_id(0)] * 1.2f / 9.0f;
|
||||
|
||||
|
||||
/* To find the dominant orientation, the gradients in x and y are
|
||||
sampled in a circle of radius 6s using wavelets of size 4s.
|
||||
We ensure the gradient wavelet size is even to ensure the
|
||||
@@ -765,16 +758,18 @@ __kernel
|
||||
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x);
|
||||
|
||||
angle = atan2(Y, X);
|
||||
|
||||
if (angle < 0)
|
||||
angle += 2.0f * CV_PI_F;
|
||||
angle *= 180.0f / CV_PI_F;
|
||||
|
||||
}
|
||||
}
|
||||
s_X[tid] = X;
|
||||
s_Y[tid] = Y;
|
||||
s_angle[tid] = angle;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
float bestx = 0, besty = 0, best_mod = 0;
|
||||
|
||||
#pragma unroll
|
||||
@@ -807,7 +802,6 @@ __kernel
|
||||
sumx += s_X[get_local_id(0) + 96];
|
||||
sumy += s_Y[get_local_id(0) + 96];
|
||||
}
|
||||
|
||||
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
|
||||
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
|
||||
|
||||
@@ -818,10 +812,8 @@ __kernel
|
||||
bestx = sumx;
|
||||
besty = sumy;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (get_local_id(0) == 0)
|
||||
{
|
||||
s_X[get_local_id(1)] = bestx;
|
||||
@@ -846,6 +838,10 @@ __kernel
|
||||
kp_dir += 2.0f * CV_PI_F;
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
|
||||
kp_dir = 360.0f - kp_dir;
|
||||
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
|
||||
kp_dir = 0.f;
|
||||
|
||||
featureDir[get_group_id(0)] = kp_dir;
|
||||
}
|
||||
}
|
||||
@@ -940,7 +936,10 @@ void calc_dx_dy(
|
||||
const float centerX = featureX[get_group_id(0)];
|
||||
const float centerY = featureY[get_group_id(0)];
|
||||
const float size = featureSize[get_group_id(0)];
|
||||
const float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
|
||||
float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
|
||||
if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
|
||||
descriptor_dir = 0.f;
|
||||
descriptor_dir *= (float)(CV_PI_F / 180.0f);
|
||||
|
||||
/* The sampling intervals and wavelet sized for selecting an orientation
|
||||
and building the keypoint descriptor are defined relative to 's' */
|
||||
|
@@ -448,3 +448,42 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c
|
||||
grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang;
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Resize
|
||||
|
||||
__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src,
|
||||
int dst_offset, int src_offset, int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
int sx = (int)floor(dx*ifx+0.5f);
|
||||
int sy = (int)floor(dy*ify+0.5f);
|
||||
sx = min(sx, src_cols-1);
|
||||
sy = min(sy, src_rows-1);
|
||||
int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx;
|
||||
int spos = (src_offset>>2) + sy * (src_step>>2) + sx;
|
||||
|
||||
if(dx<dst_cols && dy<dst_rows)
|
||||
dst[dpos] = src[spos];
|
||||
}
|
||||
|
||||
__kernel void resize_8UC1_kernel(__global uchar * dst, __global const uchar * src,
|
||||
int dst_offset, int src_offset, int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
int sx = (int)floor(dx*ifx+0.5f);
|
||||
int sy = (int)floor(dy*ify+0.5f);
|
||||
sx = min(sx, src_cols-1);
|
||||
sy = min(sy, src_rows-1);
|
||||
int dpos = dst_offset + dy * dst_step + dx;
|
||||
int spos = src_offset + sy * src_step + sx;
|
||||
|
||||
if(dx<dst_cols && dy<dst_rows)
|
||||
dst[dpos] = src[spos];
|
||||
}
|
@@ -51,510 +51,458 @@ using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
#define EXT_FP64 0
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
|
||||
#else
|
||||
//helper routines
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *match_template;
|
||||
}
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *match_template;
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace ocl
|
||||
{
|
||||
void matchTemplate_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
void matchTemplate_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_SQDIFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
void matchTemplate_SQDIFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
void matchTemplate_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_CCORR_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
void matchTemplate_CCORR_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_CCOFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
void matchTemplate_CCOFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_CCOFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
void matchTemplate_CCOFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
|
||||
void matchTemplateNaive_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
|
||||
void matchTemplateNaive_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
|
||||
|
||||
void matchTemplateNaive_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
|
||||
void matchTemplateNaive_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
|
||||
|
||||
// Evaluates optimal template's area threshold. If
|
||||
// template's area is less than the threshold, we use naive match
|
||||
// template version, otherwise FFT-based (if available)
|
||||
int getTemplateThreshold(int method, int depth)
|
||||
{
|
||||
switch (method)
|
||||
{
|
||||
case CV_TM_CCORR:
|
||||
if (depth == CV_32F) return 250;
|
||||
if (depth == CV_8U) return 300;
|
||||
break;
|
||||
case CV_TM_SQDIFF:
|
||||
if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
|
||||
if (depth == CV_8U) return 300;
|
||||
break;
|
||||
}
|
||||
CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
|
||||
return 0;
|
||||
}
|
||||
// Evaluates optimal template's area threshold. If
|
||||
// template's area is less than the threshold, we use naive match
|
||||
// template version, otherwise FFT-based (if available)
|
||||
int getTemplateThreshold(int method, int depth)
|
||||
{
|
||||
switch (method)
|
||||
{
|
||||
case CV_TM_CCORR:
|
||||
if (depth == CV_32F) return 250;
|
||||
if (depth == CV_8U) return 300;
|
||||
break;
|
||||
case CV_TM_SQDIFF:
|
||||
if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
|
||||
if (depth == CV_8U) return 300;
|
||||
break;
|
||||
}
|
||||
CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
|
||||
return 0;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SQDIFF
|
||||
void matchTemplate_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||
{
|
||||
matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO
|
||||
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplate_SQDIFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
buf.image_sums.resize(1);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SQDIFF
|
||||
void matchTemplate_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||
{
|
||||
matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO
|
||||
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
||||
}
|
||||
}
|
||||
integral(image.reshape(1), buf.image_sums[0]);
|
||||
|
||||
void matchTemplate_SQDIFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
|
||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
||||
|
||||
#if EXT_FP64 && SQRSUM_FIXED
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
#else
|
||||
Mat sqr_mat = templ.reshape(1);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
|
||||
#endif
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
||||
}
|
||||
|
||||
void matchTemplateNaive_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
|
||||
{
|
||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|
||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
|
||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Naive_SQDIFF";
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CCORR
|
||||
void matchTemplate_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||
{
|
||||
matchTemplateNaive_CCORR(image, templ, result, image.channels());
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
||||
if(image.depth() == CV_8U && templ.depth() == CV_8U)
|
||||
{
|
||||
image.convertTo(buf.imagef, CV_32F);
|
||||
templ.convertTo(buf.templf, CV_32F);
|
||||
}
|
||||
CV_Assert(image.channels() == 1);
|
||||
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
|
||||
filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
|
||||
result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplate_CCORR_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
|
||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
||||
#if EXT_FP64 && SQRSUM_FIXED
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
#elif EXT_FP64
|
||||
oclMat templ_c1 = templ.reshape(1);
|
||||
multiply(templ_c1, templ_c1, templ_c1);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
|
||||
#else
|
||||
Mat m_templ_c1 = templ.reshape(1);
|
||||
multiply(m_templ_c1, m_templ_c1, m_templ_c1);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0];
|
||||
#endif
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "normalizeKernel";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
||||
}
|
||||
|
||||
void matchTemplateNaive_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
|
||||
{
|
||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|
||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
|
||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Naive_CCORR";
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CCOFF
|
||||
void matchTemplate_CCOFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
|
||||
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName;
|
||||
|
||||
kernelName = "matchTemplate_Prepared_CCOFF";
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
// to be continued in the following section
|
||||
if(image.channels() == 1)
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
// FIXME: temp fix for incorrect integral kernel
|
||||
oclMat tmp_oclmat;
|
||||
integral(image, buf.image_sums[0], tmp_oclmat);
|
||||
|
||||
float templ_sum = 0;
|
||||
#if EXT_FP64
|
||||
templ_sum = (float)sum(templ)[0] / templ.size().area();
|
||||
#else
|
||||
Mat o_templ = templ;
|
||||
templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
|
||||
#endif
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec4f templ_sum = Vec4f::all(0);
|
||||
#if EXT_FP64
|
||||
split(image,buf.images);
|
||||
templ_sum = sum(templ) / templ.size().area();
|
||||
#else
|
||||
// temp fix for non-double supported machine
|
||||
Mat o_templ = templ, o_image = image;
|
||||
vector<Mat> o_mat_vector;
|
||||
o_mat_vector.resize(image.channels());
|
||||
buf.images.resize(image.channels());
|
||||
split(o_image, o_mat_vector);
|
||||
for(int i = 0; i < o_mat_vector.size(); i ++)
|
||||
{
|
||||
buf.images[i] = oclMat(o_mat_vector[i]);
|
||||
}
|
||||
templ_sum = sum(o_templ) / templ.size().area();
|
||||
#endif
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
// FIXME: temp fix for incorrect integral kernel
|
||||
oclMat omat_temp;
|
||||
integral(buf.images[i], buf.image_sums[i], omat_temp);
|
||||
}
|
||||
switch(image.channels())
|
||||
{
|
||||
case 4:
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
||||
break;
|
||||
}
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
|
||||
void matchTemplate_CCOFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
image.convertTo(buf.imagef, CV_32F);
|
||||
templ.convertTo(buf.templf, CV_32F);
|
||||
|
||||
matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
|
||||
float scale = 1.f/templ.size().area();
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName;
|
||||
|
||||
kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
|
||||
// to be continued in the following section
|
||||
if(image.channels() == 1)
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
|
||||
float templ_sum = 0;
|
||||
float templ_sqsum = 0;
|
||||
#if EXT_FP64
|
||||
templ_sum = (float)sum(templ)[0];
|
||||
#if SQRSUM_FIXED
|
||||
templ_sqsum = sqrSum(templ);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
#else
|
||||
oclMat templ_sqr = templ;
|
||||
multiply(templ,templ, templ_sqr);
|
||||
templ_sqsum = sum(templ_sqr)[0];
|
||||
#endif //SQRSUM_FIXED
|
||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||
templ_sum *= scale;
|
||||
#else
|
||||
// temp fix for non-double supported machine
|
||||
Mat o_templ = templ;
|
||||
templ_sum = (float)sum(o_templ)[0];
|
||||
templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum;
|
||||
templ_sum *= scale;
|
||||
Mat sqr_mat = templ.reshape(1);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
|
||||
#endif
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec4f templ_sum = Vec4f::all(0);
|
||||
Vec4f templ_sqsum = Vec4f::all(0);
|
||||
#if EXT_FP64
|
||||
split(image,buf.images);
|
||||
templ_sum = sum(templ);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
||||
}
|
||||
|
||||
void matchTemplateNaive_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
|
||||
{
|
||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|
||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
|
||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Naive_SQDIFF";
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CCORR
|
||||
void matchTemplate_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||
{
|
||||
matchTemplateNaive_CCORR(image, templ, result, image.channels());
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
||||
if(image.depth() == CV_8U && templ.depth() == CV_8U)
|
||||
{
|
||||
image.convertTo(buf.imagef, CV_32F);
|
||||
templ.convertTo(buf.templf, CV_32F);
|
||||
}
|
||||
CV_Assert(image.channels() == 1);
|
||||
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
|
||||
filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
|
||||
result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplate_CCORR_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
|
||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
||||
#if SQRSUM_FIXED
|
||||
templ_sqsum = sqrSum(templ);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
#else
|
||||
oclMat templ_sqr = templ;
|
||||
multiply(templ,templ, templ_sqr);
|
||||
templ_sqsum = sum(templ_sqr);
|
||||
#endif //SQRSUM_FIXED
|
||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||
|
||||
#else
|
||||
// temp fix for non-double supported machine
|
||||
Mat o_templ = templ, o_image = image;
|
||||
|
||||
vector<Mat> o_mat_vector;
|
||||
o_mat_vector.resize(image.channels());
|
||||
buf.images.resize(image.channels());
|
||||
split(o_image, o_mat_vector);
|
||||
for(int i = 0; i < o_mat_vector.size(); i ++)
|
||||
{
|
||||
buf.images[i] = oclMat(o_mat_vector[i]);
|
||||
}
|
||||
templ_sum = sum(o_templ);
|
||||
templ_sqsum = sum(o_templ.mul(o_templ));
|
||||
oclMat templ_c1 = templ.reshape(1);
|
||||
multiply(templ_c1, templ_c1, templ_c1);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
|
||||
#endif
|
||||
float templ_sqsum_sum = 0;
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
|
||||
}
|
||||
templ_sum *= scale;
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
buf.image_sqsums.resize(buf.images.size());
|
||||
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
|
||||
}
|
||||
|
||||
switch(image.channels())
|
||||
{
|
||||
case 4:
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
||||
break;
|
||||
}
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "normalizeKernel";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
||||
}
|
||||
|
||||
void matchTemplateNaive_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
|
||||
{
|
||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|
||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
|
||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Naive_CCORR";
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CCOFF
|
||||
void matchTemplate_CCOFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
|
||||
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName;
|
||||
|
||||
kernelName = "matchTemplate_Prepared_CCOFF";
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
// to be continued in the following section
|
||||
if(image.channels() == 1)
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
integral(image, buf.image_sums[0]);
|
||||
|
||||
float templ_sum = 0;
|
||||
templ_sum = (float)sum(templ)[0] / templ.size().area();
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec4f templ_sum = Vec4f::all(0);
|
||||
split(image,buf.images);
|
||||
templ_sum = sum(templ) / templ.size().area();
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
integral(buf.images[i], buf.image_sums[i]);
|
||||
}
|
||||
switch(image.channels())
|
||||
{
|
||||
case 4:
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
||||
break;
|
||||
}
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
|
||||
void matchTemplate_CCOFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
image.convertTo(buf.imagef, CV_32F);
|
||||
templ.convertTo(buf.templf, CV_32F);
|
||||
|
||||
matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
|
||||
float scale = 1.f/templ.size().area();
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName;
|
||||
|
||||
kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
|
||||
// to be continued in the following section
|
||||
if(image.channels() == 1)
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
|
||||
float templ_sum = 0;
|
||||
float templ_sqsum = 0;
|
||||
templ_sum = (float)sum(templ)[0];
|
||||
#if SQRSUM_FIXED
|
||||
templ_sqsum = sqrSum(templ)[0];
|
||||
#else
|
||||
oclMat templ_sqr = templ;
|
||||
multiply(templ,templ, templ_sqr);
|
||||
templ_sqsum = sum(templ_sqr)[0];
|
||||
#endif //SQRSUM_FIXED
|
||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||
templ_sum *= scale;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec4f templ_sum = Vec4f::all(0);
|
||||
Vec4f templ_sqsum = Vec4f::all(0);
|
||||
|
||||
split(image,buf.images);
|
||||
templ_sum = sum(templ);
|
||||
#if SQRSUM_FIXED
|
||||
templ_sqsum = sqrSum(templ);
|
||||
#else
|
||||
oclMat templ_sqr = templ;
|
||||
multiply(templ,templ, templ_sqr);
|
||||
templ_sqsum = sum(templ_sqr);
|
||||
#endif //SQRSUM_FIXED
|
||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||
|
||||
float templ_sqsum_sum = 0;
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
|
||||
}
|
||||
templ_sum *= scale;
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
buf.image_sqsums.resize(buf.images.size());
|
||||
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
|
||||
}
|
||||
|
||||
switch(image.channels())
|
||||
{
|
||||
case 4:
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
||||
break;
|
||||
}
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
|
||||
}/*ocl*/} /*cv*/
|
||||
|
||||
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
|
||||
{
|
||||
MatchTemplateBuf buf;
|
||||
matchTemplate(image,templ, result, method,buf);
|
||||
MatchTemplateBuf buf;
|
||||
matchTemplate(image,templ, result, method,buf);
|
||||
}
|
||||
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
|
||||
{
|
||||
CV_Assert(image.type() == templ.type());
|
||||
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
|
||||
CV_Assert(image.type() == templ.type());
|
||||
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
|
||||
|
||||
typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
|
||||
typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
|
||||
|
||||
const Caller callers[] = {
|
||||
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
|
||||
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
|
||||
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
|
||||
};
|
||||
const Caller callers[] = {
|
||||
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
|
||||
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
|
||||
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
|
||||
};
|
||||
|
||||
Caller caller = callers[method];
|
||||
CV_Assert(caller);
|
||||
caller(image, templ, result, buf);
|
||||
Caller caller = callers[method];
|
||||
CV_Assert(caller);
|
||||
caller(image, templ, result, buf);
|
||||
}
|
||||
#endif //
|
||||
|
@@ -1,4 +1,49 @@
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, yao@multicorewareinc.com
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
@@ -24,7 +69,6 @@ namespace cv
|
||||
template<typename T>
|
||||
void pyrdown_run(const oclMat &src, const oclMat &dst)
|
||||
{
|
||||
CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows);
|
||||
|
||||
CV_Assert(src.type() == dst.type());
|
||||
CV_Assert(src.depth() != CV_8S);
|
||||
@@ -108,7 +152,7 @@ void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
|
||||
|
||||
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
||||
|
||||
//dst.step = dst.rows;
|
||||
dst.download_channels = src.download_channels;
|
||||
|
||||
pyrdown_run(src, dst);
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@
|
||||
//
|
||||
// @Authors
|
||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||
// Yao Wang, yao@multicorewareinc.com
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
@@ -61,8 +62,9 @@ namespace cv { namespace ocl
|
||||
{
|
||||
extern const char *pyr_up;
|
||||
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
|
||||
{
|
||||
{
|
||||
dst.create(src.rows * 2, src.cols * 2, src.type());
|
||||
dst.download_channels=src.download_channels;
|
||||
Context *clCxt = src.clCxt;
|
||||
|
||||
const std::string kernelName = "pyrUp";
|
||||
|
@@ -149,8 +149,7 @@ namespace
|
||||
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
|
||||
|
||||
bindImgTex(img);
|
||||
oclMat integral_sqsum;
|
||||
integral(img, surf_.sum, integral_sqsum); // the two argumented integral version is incorrect
|
||||
integral(img, surf_.sum); // the two argumented integral version is incorrect
|
||||
|
||||
bindSumTex(surf_.sum);
|
||||
maskSumTex = 0;
|
||||
|
Reference in New Issue
Block a user