From 5864895ec6517bca45644988e5e225d37418cecb Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 23 Oct 2013 17:20:10 +0400 Subject: [PATCH] fixed ocl::copyMakeBorder --- modules/ocl/src/imgproc.cpp | 41 +--- .../ocl/src/opencl/imgproc_copymakeboder.cl | 225 ++++++------------ modules/ocl/test/test_imgproc.cpp | 2 +- 3 files changed, 90 insertions(+), 178 deletions(-) diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 10b680486..0a2cf3f8d 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -436,7 +436,7 @@ namespace cv CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0); - if( _src.offset != 0 && (bordertype & BORDER_ISOLATED) == 0 ) + if( (_src.wholecols != _src.cols || _src.wholerows != _src.rows) && (bordertype & BORDER_ISOLATED) == 0 ) { Size wholeSize; Point ofs; @@ -453,34 +453,25 @@ namespace cv } bordertype &= ~cv::BORDER_ISOLATED; - // TODO need to remove this conditions and fix the code - if (bordertype == cv::BORDER_REFLECT || bordertype == cv::BORDER_WRAP) - { - CV_Assert((_src.cols >= left) && (_src.cols >= right) && (_src.rows >= top) && (_src.rows >= bottom)); - } - else if (bordertype == cv::BORDER_REFLECT_101) - { - CV_Assert((_src.cols > left) && (_src.cols > right) && (_src.rows > top) && (_src.rows > bottom)); - } - dst.create(_src.rows + top + bottom, _src.cols + left + right, _src.type()); - int srcStep = _src.step1() / _src.oclchannels(), dstStep = dst.step1() / dst.oclchannels(); + int srcStep = _src.step / _src.elemSize(), dstStep = dst.step / dst.elemSize(); int srcOffset = _src.offset / _src.elemSize(), dstOffset = dst.offset / dst.elemSize(); int depth = _src.depth(), ochannels = _src.oclchannels(); - int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101}; - const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"}; - size_t bordertype_index; + int __bordertype[] = { BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101 }; + const char *borderstr[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }; - for(bordertype_index = 0; bordertype_index < sizeof(__bordertype) / sizeof(int); bordertype_index++) - if (__bordertype[bordertype_index] == bordertype) + int bordertype_index = -1; + for (int i = 0, end = sizeof(__bordertype) / sizeof(int); i < end; i++) + if (__bordertype[i] == bordertype) + { + bordertype_index = i; break; - - if (bordertype_index == sizeof(__bordertype) / sizeof(int)) + } + if (bordertype_index < 0) CV_Error(CV_StsBadArg, "Unsupported border type"); - string kernelName = "copymakeborder"; - size_t localThreads[3] = {16, 16, 1}; + size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; vector< pair > args; @@ -503,12 +494,6 @@ namespace cv typeMap[depth], channelMap[ochannels], borderstr[bordertype_index]); - if (src.type() == CV_8UC1 && (dst.offset & 3) == 0 && (dst.cols & 3) == 0) - { - kernelName = "copymakeborder_C1_D0"; - globalThreads[0] = dst.cols >> 2; - } - int cn = src.channels(), ocn = src.oclchannels(); int bufSize = src.elemSize1() * ocn; AutoBuffer _buf(bufSize); @@ -518,7 +503,7 @@ namespace cv args.push_back( make_pair( bufSize , (void *)buf )); - openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, kernelName, globalThreads, + openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, "copymakeborder", globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } diff --git a/modules/ocl/src/opencl/imgproc_copymakeboder.cl b/modules/ocl/src/opencl/imgproc_copymakeboder.cl index ff7509ffd..b1686842e 100644 --- a/modules/ocl/src/opencl/imgproc_copymakeboder.cl +++ b/modules/ocl/src/opencl/imgproc_copymakeboder.cl @@ -35,173 +35,100 @@ // #if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif #ifdef BORDER_CONSTANT -//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii -#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2) -#endif - -#ifdef BORDER_REPLICATE -//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh -#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? (l_edge) : (addr) -#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? (r_edge)-1 : (addr) -#endif - +#define EXTRAPOLATE(x, y, v) v = scalar; +#elif defined BORDER_REPLICATE +#define EXTRAPOLATE(x, y, v) \ + { \ + x = max(min(x, src_cols - 1), 0); \ + y = max(min(y, src_rows - 1), 0); \ + v = src[mad24(y, src_step, x + src_offset)]; \ + } +#elif defined BORDER_WRAP +#define EXTRAPOLATE(x, y, v) \ + { \ + if (x < 0) \ + x -= ((x - src_cols + 1) / src_cols) * src_cols; \ + if (x >= src_cols) \ + x %= src_cols; \ + \ + if (y < 0) \ + y -= ((y - src_rows + 1) / src_rows) * src_rows; \ + if( y >= src_rows ) \ + y %= src_rows; \ + v = src[mad24(y, src_step, x + src_offset)]; \ + } +#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101) #ifdef BORDER_REFLECT -//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb -#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? -(i)-1 : (addr) -#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr) +#define DELTA int delta = 0 +#else +#define DELTA int delta = 1 +#endif +#define EXTRAPOLATE(x, y, v) \ + { \ + DELTA; \ + if (src_cols == 1) \ + x = 0; \ + else \ + do \ + { \ + if( x < 0 ) \ + x = -x - 1 + delta; \ + else \ + x = src_cols - 1 - (x - src_cols) - delta; \ + } \ + while (x >= src_cols || x < 0); \ + \ + if (src_rows == 1) \ + y = 0; \ + else \ + do \ + { \ + if( y < 0 ) \ + y = -y - 1 + delta; \ + else \ + y = src_rows - 1 - (y - src_rows) - delta; \ + } \ + while (y >= src_rows || y < 0); \ + v = src[mad24(y, src_step, x + src_offset)]; \ + } +#else +#error No extrapolation method #endif -#ifdef BORDER_REFLECT_101 -//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba -#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? -(i) : (addr) -#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr) -#endif - -#ifdef BORDER_WRAP -//BORDER_WRAP: cdefgh|abcdefgh|abcdefg -#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? (i)+(r_edge) : (addr) -#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? (i)-(r_edge) : (addr) -#endif +#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0) __kernel void copymakeborder (__global const GENTYPE *src, __global GENTYPE *dst, - const int dst_cols, - const int dst_rows, - const int src_cols, - const int src_rows, - const int src_step_in_pixel, - const int src_offset_in_pixel, - const int dst_step_in_pixel, - const int dst_offset_in_pixel, - const int top, - const int left, - const GENTYPE val - ) + int dst_cols, int dst_rows, + int src_cols, int src_rows, + int src_step, int src_offset, + int dst_step, int dst_offset, + int top, int left, GENTYPE scalar) { int x = get_global_id(0); int y = get_global_id(1); - int src_x = x-left; - int src_y = y-top; - int src_addr = mad24(src_y,src_step_in_pixel,src_x+src_offset_in_pixel); - int dst_addr = mad24(y,dst_step_in_pixel,x+dst_offset_in_pixel); - int con = (src_x >= 0) && (src_x < src_cols) && (src_y >= 0) && (src_y < src_rows); - if(con) - { - dst[dst_addr] = src[src_addr]; - } - else - { - #ifdef BORDER_CONSTANT - //write the result to dst - if((x= 0) && (src_x+3 < src_cols) && (src_y >= 0) && (src_y < src_rows); - if(con) + if (x < dst_cols && y < dst_rows) { - uchar4 tmp = vload4(0,src+src_addr); - *(__global uchar4*)(dst+dst_addr) = tmp; - } - else - { - #ifdef BORDER_CONSTANT - //write the result to dst - if((((src_x<0) && (src_x+3>=0))||(src_x < src_cols) && (src_x+3 >= src_cols)) && (src_y >= 0) && (src_y < src_rows)) + int src_x = x - left; + int src_y = y - top; + int dst_index = mad24(y, dst_step, x + dst_offset); + + if (NEED_EXTRAPOLATION(src_x, src_y)) + EXTRAPOLATE(src_x, src_y, dst[dst_index]) + else { - int4 addr; - uchar4 tmp; - addr.x = ((src_x < 0) || (src_x>= src_cols)) ? 0 : src_addr; - addr.y = ((src_x+1 < 0) || (src_x+1>= src_cols)) ? 0 : (src_addr+1); - addr.z = ((src_x+2 < 0) || (src_x+2>= src_cols)) ? 0 : (src_addr+2); - addr.w = ((src_x+3 < 0) || (src_x+3>= src_cols)) ? 0 : (src_addr+3); - tmp.x = src[addr.x]; - tmp.y = src[addr.y]; - tmp.z = src[addr.z]; - tmp.w = src[addr.w]; - tmp.x = (src_x >=0)&&(src_x < src_cols) ? tmp.x : val; - tmp.y = (src_x+1 >=0)&&(src_x +1 < src_cols) ? tmp.y : val; - tmp.z = (src_x+2 >=0)&&(src_x +2 < src_cols) ? tmp.z : val; - tmp.w = (src_x+3 >=0)&&(src_x +3 < src_cols) ? tmp.w : val; - *(__global uchar4*)(dst+dst_addr) = tmp; + int src_index = mad24(src_y, src_step, src_x + src_offset); + dst[dst_index] = src[src_index]; } - else if((x