remove warnings of some functions in ocl module

This commit is contained in:
yao 2012-09-20 09:23:11 +08:00
parent 82b30963d2
commit 2e36338636
5 changed files with 267 additions and 340 deletions

View File

@ -75,13 +75,13 @@ cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(
void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
{
dx.create(image_size, CV_32SC1);
dy.create(image_size, CV_32SC1);
ensureSizeIsEnough(image_size, CV_32SC1, dx);
ensureSizeIsEnough(image_size, CV_32SC1, dy);
if(apperture_size == 3)
{
dx_buf.create(image_size, CV_32SC1);
dy_buf.create(image_size, CV_32SC1);
ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);
ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);
}
else if(apperture_size > 0)
{
@ -95,18 +95,18 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
}
}
edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
float counter_f [1] = { 0 };
int counter_i [1] = { 0 };
int err = 0;
if(counter)
{
openCLFree(counter);
}
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
openCLSafeCall(err);
}
@ -357,16 +357,18 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, i
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols)
{
unsigned int count;
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
Context *clCxt = map.clCxt;
string kernelName = "edgesHysteresisGlobal";
vector< pair<size_t, const void *> > args;
size_t localThreads[3] = {128, 1, 1};
#define DIVUP(a, b) ((a)+(b)-1)/(b)
int count_i[1] = {0};
while(count > 0)
{
openCLSafeCall(clEnqueueWriteBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
args.clear();
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
@ -380,7 +382,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, voi
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
#undef DIVUP

View File

@ -601,7 +601,7 @@ __kernel
{
int n;
#pragma unroll
#pragma unroll
for (int k = 0; k < 16; ++k)
{
n = 0;
@ -686,12 +686,6 @@ __kernel
__local ushort2 s_st[stack_size];
if(gidx + gidy == 0)
{
*counter = 0;
}
barrier(CLK_GLOBAL_MEM_FENCE);
if(lidx == 0)
{
s_counter = 0;

View File

@ -114,7 +114,7 @@ namespace cv { namespace ocl
//////////////////////////////////////////////////////////////////////
// SQDIFF
void matchTemplate_SQDIFF(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &)
{
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
@ -167,10 +167,11 @@ namespace cv { namespace ocl
}
void matchTemplateNaive_SQDIFF(
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
const oclMat& image, const oclMat& templ, oclMat& result, int)
{
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
@ -263,10 +264,11 @@ namespace cv { namespace ocl
}
void matchTemplateNaive_CCORR(
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
const oclMat& image, const oclMat& templ, oclMat& result, int)
{
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
@ -341,6 +343,7 @@ namespace cv { namespace ocl
templ_sum = sum(templ) / templ.size().area();
buf.image_sums.resize(buf.images.size());
for(int i = 0; i < image.channels(); i ++)
{
integral(buf.images[i], buf.image_sums[i]);
@ -408,7 +411,7 @@ namespace cv { namespace ocl
#else
oclMat templ_sqr = templ;
multiply(templ,templ, templ_sqr);
templ_sqsum = sum(templ_sqr)[0];
templ_sqsum = saturate_cast<float>(sum(templ_sqr)[0]);
#endif //SQRSUM_FIXED
templ_sqsum -= scale * templ_sum * templ_sum;
templ_sum *= scale;

View File

@ -44,7 +44,7 @@
//M*/
#include <iomanip>
#include "precomp.hpp"
#include "opencv2/highgui/highgui.hpp"
using namespace cv;
using namespace cv::ocl;
@ -72,14 +72,13 @@ namespace cv { namespace ocl
extern const char * nonfree_surf;
}}
namespace
static inline int divUp(int total, int grain)
{
static inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
static inline int calcSize(int octave, int layer)
{
}
static inline int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
const int HAAR_SIZE0 = 9;
@ -91,15 +90,13 @@ namespace
const int HAAR_SIZE_INC = 6;
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
}
}
class SURF_OCL_Invoker
{
public:
class SURF_OCL_Invoker
{
public:
// facilities
void bindImgTex(const oclMat& img);
void bindSumTex(const oclMat& sum);
void bindMaskSumTex(const oclMat& maskSum);
void bindImgTex(const oclMat& img, cl_mem & texture);
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
@ -149,10 +146,10 @@ namespace
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
bindImgTex(img);
bindImgTex(img, imgTex);
integral(img, surf_.sum); // the two argumented integral version is incorrect
bindSumTex(surf_.sum);
bindImgTex(surf_.sum, sumTex);
maskSumTex = 0;
if (use_mask)
@ -164,7 +161,7 @@ namespace
temp.setTo(Scalar::all(1.0));
//cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
integral(surf_.mask1, surf_.maskSum);
bindMaskSumTex(surf_.maskSum);
bindImgTex(surf_.maskSum, maskSumTex);
}
}
@ -172,11 +169,11 @@ namespace
{
// create image pyramid buffers
// different layers have same sized buffers, but they are sampled from gaussin kernel.
surf_.det.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1);
surf_.trace.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1);
ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.det);
ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace);
surf_.maxPosBuffer.create(1, maxCandidates, CV_32SC4);
keypoints.create(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1);
ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer);
ensureSizeIsEnough(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1, keypoints);
keypoints.setTo(Scalar::all(0));
for (int octave = 0; octave < surf_.nOctaves; ++octave)
@ -225,7 +222,7 @@ namespace
const int nFeatures = keypoints.cols;
if (nFeatures > 0)
{
descriptors.create(nFeatures, descriptorSize, CV_32F);
ensureSizeIsEnough(nFeatures, descriptorSize, CV_32F, descriptors);
compute_descriptors_gpu(descriptors, keypoints, nFeatures);
}
}
@ -241,7 +238,7 @@ namespace
additioalParamBuffer.release();
}
private:
private:
SURF_OCL& surf_;
int img_cols, img_rows;
@ -259,8 +256,13 @@ namespace
cl_mem maskSumTex;
oclMat additioalParamBuffer;
};
}
SURF_OCL_Invoker& operator= (const SURF_OCL_Invoker& right)
{
(*this) = right;
return *this;
} // remove warning C4512
};
cv::ocl::SURF_OCL::SURF_OCL()
{
@ -274,7 +276,7 @@ cv::ocl::SURF_OCL::SURF_OCL()
cv::ocl::SURF_OCL::SURF_OCL(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright)
{
hessianThreshold = _threshold;
hessianThreshold = saturate_cast<float>(_threshold);
extended = _extended;
nOctaves = _nOctaves;
nOctaveLayers = _nOctaveLayers;
@ -440,150 +442,77 @@ void cv::ocl::SURF_OCL::releaseMemory()
maxPosBuffer.release();
}
// Facilities
//// load SURF constants into device memory
//void SURF_OCL_Invoker::loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
//{
// Mat tmp(1, 9, CV_32FC1);
// float * tmp_data = tmp.ptr<float>();
// *tmp_data = maxCandidates;
// *(++tmp_data) = maxFeatures;
// *(++tmp_data) = img_rows;
// *(++tmp_data) = img_cols;
// *(++tmp_data) = nOctaveLayers;
// *(++tmp_data) = hessianThreshold;
// additioalParamBuffer = tmp;
//}
//void SURF_OCL_Invoker::loadOctaveConstants(int octave, int layer_rows, int layer_cols)
//{
// Mat tmp = additioalParamBuffer;
// float * tmp_data = tmp.ptr<float>();
// tmp_data += 6;
// *tmp_data = octave;
// *(++tmp_data) = layer_rows;
// *(++tmp_data) = layer_cols;
// additioalParamBuffer = tmp;
//}
// create and bind source buffer to image oject.
void SURF_OCL_Invoker::bindImgTex(const oclMat& img)
// bind source buffer to image oject.
void SURF_OCL_Invoker::bindImgTex(const oclMat& img, cl_mem& texture)
{
Mat cpu_img(img); // time consuming
cl_image_format format;
int err;
int depth = img.depth();
int channels = img.channels();
switch(depth)
{
case CV_8U:
format.image_channel_data_type = CL_UNSIGNED_INT8;
format.image_channel_order = CL_R;
if(imgTex)
{
openCLFree(imgTex);
}
#if CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = cpu_img.cols;
desc.image_height = cpu_img.rows;
desc.image_depth = NULL;
desc.image_array_size = 1;
desc.image_row_pitch = cpu_img.step;
desc.image_slice_pitch= 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
imgTex = clCreateImage(img.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
#else
imgTex = clCreateImage2D(
img.clCxt->impl->clContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
&format,
cpu_img.cols,
cpu_img.rows,
cpu_img.step,
cpu_img.data,
&err);
#endif
openCLSafeCall(err);
}
void SURF_OCL_Invoker::bindSumTex(const oclMat& sum)
{
Mat cpu_img(sum); // time consuming
cl_image_format format;
int err;
break;
case CV_32S:
format.image_channel_data_type = CL_UNSIGNED_INT32;
format.image_channel_order = CL_R;
if(sumTex)
break;
case CV_32F:
format.image_channel_data_type = CL_FLOAT;
break;
default:
throw std::exception();
break;
}
switch(channels)
{
openCLFree(sumTex);
case 1:
format.image_channel_order = CL_R;
break;
case 3:
format.image_channel_order = CL_RGB;
break;
case 4:
format.image_channel_order = CL_RGBA;
break;
default:
throw std::exception();
break;
}
if(texture)
{
openCLFree(texture);
}
#if CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = cpu_img.cols;
desc.image_height = cpu_img.rows;
desc.image_depth = NULL;
desc.image_width = img.step / img.elemSize();
desc.image_height = img.rows;
desc.image_depth = 0;
desc.image_array_size = 1;
desc.image_row_pitch = cpu_img.step;
desc.image_row_pitch = 0;
desc.image_slice_pitch= 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
sumTex = clCreateImage(sum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
sumTex = clCreateImage2D(
sum.clCxt->impl->clContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
texture = clCreateImage2D(
Context::getContext()->impl->clContext,
CL_MEM_READ_WRITE,
&format,
cpu_img.cols,
cpu_img.rows,
cpu_img.step,
cpu_img.data,
&err);
#endif
openCLSafeCall(err);
}
void SURF_OCL_Invoker::bindMaskSumTex(const oclMat& maskSum)
{
Mat cpu_img(maskSum); // time consuming
cl_image_format format;
int err;
format.image_channel_data_type = CL_UNSIGNED_INT32;
format.image_channel_order = CL_R;
if(maskSumTex)
{
openCLFree(maskSumTex);
}
#if CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = cpu_img.cols;
desc.image_height = cpu_img.rows;
desc.image_depth = NULL;
desc.image_array_size = 1;
desc.image_row_pitch = cpu_img.step;
desc.image_slice_pitch= 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
maskSumTex = clCreateImage(maskSum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
#else
maskSumTex = clCreateImage2D(
maskSum.clCxt->impl->clContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
&format,
cpu_img.cols,
cpu_img.rows,
cpu_img.step,
cpu_img.data,
img.step / img.elemSize(),
img.rows,
0,
NULL,
&err);
#endif
size_t origin[] = { 0, 0, 0 };
size_t region[] = { img.step/img.elemSize(), img.rows, 1 };
clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
openCLSafeCall(err);
}
@ -676,7 +605,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMa
args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures));
size_t localThreads[3] = {3, 3, 3};
size_t globalThreads[3] = {maxCounter * localThreads[0], 1, 1};
size_t globalThreads[3] = {maxCounter * localThreads[0], localThreads[1], 1};
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}

View File

@ -44,7 +44,6 @@
#include "precomp.hpp"
#define PERF_TEST 0
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate