added ROI support to ocl::columnSum
This commit is contained in:
@@ -52,25 +52,24 @@ using namespace cv::ocl;
|
||||
void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32FC1);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
Context *clCxt = src.clCxt;
|
||||
|
||||
const std::string kernelName = "columnSum";
|
||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
|
||||
std::vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset));
|
||||
|
||||
size_t globalThreads[3] = {dst.cols, 1, 1};
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_columnsum, "columnSum", globalThreads, localThreads, args, src.oclchannels(), src.depth());
|
||||
|
||||
}
|
||||
|
||||
@@ -43,38 +43,28 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#if defined (__ATI__)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
|
||||
#elif defined (__NVIDIA__)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
///////////////////////// columnSum ////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////
|
||||
/// CV_32FC1
|
||||
__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep)
|
||||
|
||||
__kernel void columnSum_C1_D5(__global float * src, __global float * dst,
|
||||
int cols, int rows, int src_step, int dst_step, int src_offset, int dst_offset)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
|
||||
srcStep >>= 2;
|
||||
dstStep >>= 2;
|
||||
|
||||
if (x < srcCols)
|
||||
if (x < cols)
|
||||
{
|
||||
int srcIdx = x ;
|
||||
int dstIdx = x ;
|
||||
int srcIdx = x + src_offset;
|
||||
int dstIdx = x + dst_offset;
|
||||
|
||||
float sum = 0;
|
||||
|
||||
for (int y = 0; y < srcRows; ++y)
|
||||
for (int y = 0; y < rows; ++y)
|
||||
{
|
||||
sum += src[srcIdx];
|
||||
dst[dstIdx] = sum;
|
||||
srcIdx += srcStep;
|
||||
dstIdx += dstStep;
|
||||
srcIdx += src_step;
|
||||
dstIdx += dst_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user