added ROI support to ocl::convolve
This commit is contained in:
parent
110a92c126
commit
36342eb425
@ -1585,30 +1585,31 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st
|
||||
{
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
int channels = dst.oclchannels(), depth = dst.depth();
|
||||
|
||||
size_t vector_length = 1;
|
||||
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
|
||||
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
|
||||
int rows = dst.rows;
|
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
size_t globalThreads[3] = { cols, rows, 1 };
|
||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
||||
|
||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
int temp1_step = temp1.step / temp1.elemSize(), temp1_offset = temp1.offset / temp1.elemSize();
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&temp1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_offset ));
|
||||
|
||||
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, dst.depth());
|
||||
}
|
||||
|
||||
void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y)
|
||||
{
|
||||
CV_Assert(x.depth() == CV_32F && t.depth() == CV_32F);
|
||||
|
@ -48,9 +48,12 @@
|
||||
#elif defined (__NVIDIA__)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
/************************************** convolve **************************************/
|
||||
__kernel void convolve_D5 (__global float *src, __global float *temp1, __global float *dst,
|
||||
int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight)
|
||||
|
||||
__kernel void convolve_D5(__global float *src, __global float *temp1, __global float *dst,
|
||||
int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight,
|
||||
int src_offset, int dst_offset, int koffset)
|
||||
{
|
||||
__local float smem[16 + 2 * 8][16 + 2 * 8];
|
||||
|
||||
@ -65,7 +68,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
|
||||
// 0 | 0 0 | 0
|
||||
// -----------
|
||||
// 0 | 0 0 | 0
|
||||
smem[y][x] = src[min(max(gy - 8, 0), rows - 1)*(src_step >> 2) + min(max(gx - 8, 0), cols - 1)];
|
||||
smem[y][x] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
|
||||
|
||||
// 0 | 0 x | x
|
||||
// -----------
|
||||
@ -73,7 +76,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
|
||||
// 0 | 0 0 | 0
|
||||
// -----------
|
||||
// 0 | 0 0 | 0
|
||||
smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1)*(src_step >> 2) + min(gx + 8, cols - 1)];
|
||||
smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
|
||||
|
||||
// 0 | 0 0 | 0
|
||||
// -----------
|
||||
@ -81,7 +84,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
|
||||
// x | x 0 | 0
|
||||
// -----------
|
||||
// x | x 0 | 0
|
||||
smem[y + 16][x] = src[min(gy + 8, rows - 1)*(src_step >> 2) + min(max(gx - 8, 0), cols - 1)];
|
||||
smem[y + 16][x] = src[min(gy + 8, rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
|
||||
|
||||
// 0 | 0 0 | 0
|
||||
// -----------
|
||||
@ -89,21 +92,18 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
|
||||
// 0 | 0 x | x
|
||||
// -----------
|
||||
// 0 | 0 x | x
|
||||
smem[y + 16][x + 16] = src[min(gy + 8, rows - 1)*(src_step >> 2) + min(gx + 8, cols - 1)];
|
||||
smem[y + 16][x + 16] = src[min(gy + 8, rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (gx < cols && gy < rows)
|
||||
{
|
||||
float res = 0;
|
||||
float res = 0;
|
||||
|
||||
for (int i = 0; i < kHeight; ++i)
|
||||
{
|
||||
for (int j = 0; j < kWidth; ++j)
|
||||
{
|
||||
res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * (k_step>>2) + j];
|
||||
}
|
||||
}
|
||||
dst[gy*(dst_step >> 2)+gx] = res;
|
||||
}
|
||||
res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * k_step + j + koffset];
|
||||
|
||||
dst[gy * dst_step + gx + dst_offset] = res;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user