optimized cv::copyMakeBorder

This commit is contained in:
Ilya Lavrenov 2014-05-27 16:12:52 +04:00
parent ab2749d648
commit ab428c9dbd
2 changed files with 47 additions and 59 deletions

View File

@ -989,7 +989,8 @@ namespace cv {
static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,
int left, int right, int borderType, const Scalar& value )
{
int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type),
rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
bool isolated = (borderType & BORDER_ISOLATED) != 0;
borderType &= ~cv::BORDER_ISOLATED;
@ -1001,12 +1002,10 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" };
int scalarcn = cn == 3 ? 4 : cn;
int sctype = CV_MAKETYPE(depth, scalarcn);
String buildOptions = format(
"-D T=%s -D %s "
"-D T1=%s -D cn=%d -D ST=%s",
ocl::memopTypeToStr(type), borderMap[borderType],
ocl::memopTypeToStr(depth), cn, ocl::memopTypeToStr(sctype)
);
String buildOptions = format("-D T=%s -D %s -D T1=%s -D cn=%d -D ST=%s -D rowsPerWI=%d",
ocl::memopTypeToStr(type), borderMap[borderType],
ocl::memopTypeToStr(depth), cn,
ocl::memopTypeToStr(sctype), rowsPerWI);
ocl::Kernel k("copyMakeBorder", ocl::core::copymakeborder_oclsrc, buildOptions);
if (k.empty())
@ -1042,7 +1041,7 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
top, left, ocl::KernelArg::Constant(Mat(1, 1, sctype, value)));
size_t globalsize[2] = { dst.cols, dst.rows };
size_t globalsize[2] = { dst.cols, (dst.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}

View File

@ -55,27 +55,18 @@
#endif
#ifdef BORDER_CONSTANT
#define EXTRAPOLATE(x, y, v) v = scalar;
#define EXTRAPOLATE(x, cols) \
;
#elif defined BORDER_REPLICATE
#define EXTRAPOLATE(x, y, v) \
{ \
x = clamp(x, 0, src_cols - 1); \
y = clamp(y, 0, src_rows - 1); \
v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
}
#define EXTRAPOLATE(x, cols) \
x = clamp(x, 0, cols - 1);
#elif defined BORDER_WRAP
#define EXTRAPOLATE(x, y, v) \
#define EXTRAPOLATE(x, cols) \
{ \
if (x < 0) \
x -= ((x - src_cols + 1) / src_cols) * src_cols; \
if (x >= src_cols) \
x %= src_cols; \
\
if (y < 0) \
y -= ((y - src_rows + 1) / src_rows) * src_rows; \
if( y >= src_rows ) \
y %= src_rows; \
v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
x -= ((x - cols + 1) / cols) * cols; \
if (x >= cols) \
x %= cols; \
}
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
#ifdef BORDER_REFLECT
@ -83,10 +74,10 @@
#else
#define DELTA int delta = 1
#endif
#define EXTRAPOLATE(x, y, v) \
#define EXTRAPOLATE(x, cols) \
{ \
DELTA; \
if (src_cols == 1) \
if (cols == 1) \
x = 0; \
else \
do \
@ -94,58 +85,56 @@
if( x < 0 ) \
x = -x - 1 + delta; \
else \
x = src_cols - 1 - (x - src_cols) - delta; \
x = cols - 1 - (x - cols) - delta; \
} \
while (x >= src_cols || x < 0); \
\
if (src_rows == 1) \
y = 0; \
else \
do \
{ \
if( y < 0 ) \
y = -y - 1 + delta; \
else \
y = src_rows - 1 - (y - src_rows) - delta; \
} \
while (y >= src_rows || y < 0); \
v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
while (x >= cols || x < 0); \
}
#else
#error No extrapolation method
#error "No extrapolation method"
#endif
#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
#define NEED_EXTRAPOLATION(x, cols) (x >= cols || x < 0)
__kernel void copyMakeBorder(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
int top, int left, ST nVal)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y0 = get_global_id(1) * rowsPerWI;
#ifdef BORDER_CONSTANT
T scalar = convertScalar(nVal);
#endif
if (x < dst_cols && y < dst_rows)
if (x < dst_cols)
{
int src_x = x - left;
int src_y = y - top;
int src_x = x - left, src_y;
int dst_index = mad24(y0, dst_step, mad24(x, (int)TSIZE, dst_offset));
int dst_index = mad24(y, dst_step, mad24(x, (int)TSIZE, dst_offset));
__global T * dst = (__global T *)(dstptr + dst_index);
if (NEED_EXTRAPOLATION(src_x, src_cols))
{
#ifdef BORDER_CONSTANT
for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; ++y, dst_index += dst_step)
storepix(scalar, dstptr + dst_index);
return;
#endif
EXTRAPOLATE(src_x, src_cols)
}
src_x = mad24(src_x, TSIZE, src_offset);
T v;
if (NEED_EXTRAPOLATION(src_x, src_y))
for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; ++y, dst_index += dst_step)
{
EXTRAPOLATE(src_x, src_y, v)
src_y = y - top;
if (NEED_EXTRAPOLATION(src_y, src_rows))
{
EXTRAPOLATE(src_y, src_rows)
#ifdef BORDER_CONSTANT
storepix(scalar, dstptr + dst_index);
continue;
#endif
}
int src_index = mad24(src_y, src_step, src_x);
storepix(loadpix(srcptr + src_index), dstptr + dst_index);
}
else
{
int src_index = mad24(src_y, src_step, mad24(src_x, TSIZE, src_offset));
v = loadpix(srcptr + src_index);
}
storepix(v, dst);
}
}