optimized cv::copyMakeBorder
This commit is contained in:
parent
ab2749d648
commit
ab428c9dbd
@ -989,7 +989,8 @@ namespace cv {
|
||||
static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,
|
||||
int left, int right, int borderType, const Scalar& value )
|
||||
{
|
||||
int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
|
||||
int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type),
|
||||
rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
|
||||
bool isolated = (borderType & BORDER_ISOLATED) != 0;
|
||||
borderType &= ~cv::BORDER_ISOLATED;
|
||||
|
||||
@ -1001,12 +1002,10 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int
|
||||
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" };
|
||||
int scalarcn = cn == 3 ? 4 : cn;
|
||||
int sctype = CV_MAKETYPE(depth, scalarcn);
|
||||
String buildOptions = format(
|
||||
"-D T=%s -D %s "
|
||||
"-D T1=%s -D cn=%d -D ST=%s",
|
||||
ocl::memopTypeToStr(type), borderMap[borderType],
|
||||
ocl::memopTypeToStr(depth), cn, ocl::memopTypeToStr(sctype)
|
||||
);
|
||||
String buildOptions = format("-D T=%s -D %s -D T1=%s -D cn=%d -D ST=%s -D rowsPerWI=%d",
|
||||
ocl::memopTypeToStr(type), borderMap[borderType],
|
||||
ocl::memopTypeToStr(depth), cn,
|
||||
ocl::memopTypeToStr(sctype), rowsPerWI);
|
||||
|
||||
ocl::Kernel k("copyMakeBorder", ocl::core::copymakeborder_oclsrc, buildOptions);
|
||||
if (k.empty())
|
||||
@ -1042,7 +1041,7 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int
|
||||
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
|
||||
top, left, ocl::KernelArg::Constant(Mat(1, 1, sctype, value)));
|
||||
|
||||
size_t globalsize[2] = { dst.cols, dst.rows };
|
||||
size_t globalsize[2] = { dst.cols, (dst.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
|
@ -55,27 +55,18 @@
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
#define EXTRAPOLATE(x, y, v) v = scalar;
|
||||
#define EXTRAPOLATE(x, cols) \
|
||||
;
|
||||
#elif defined BORDER_REPLICATE
|
||||
#define EXTRAPOLATE(x, y, v) \
|
||||
{ \
|
||||
x = clamp(x, 0, src_cols - 1); \
|
||||
y = clamp(y, 0, src_rows - 1); \
|
||||
v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
|
||||
}
|
||||
#define EXTRAPOLATE(x, cols) \
|
||||
x = clamp(x, 0, cols - 1);
|
||||
#elif defined BORDER_WRAP
|
||||
#define EXTRAPOLATE(x, y, v) \
|
||||
#define EXTRAPOLATE(x, cols) \
|
||||
{ \
|
||||
if (x < 0) \
|
||||
x -= ((x - src_cols + 1) / src_cols) * src_cols; \
|
||||
if (x >= src_cols) \
|
||||
x %= src_cols; \
|
||||
\
|
||||
if (y < 0) \
|
||||
y -= ((y - src_rows + 1) / src_rows) * src_rows; \
|
||||
if( y >= src_rows ) \
|
||||
y %= src_rows; \
|
||||
v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
|
||||
x -= ((x - cols + 1) / cols) * cols; \
|
||||
if (x >= cols) \
|
||||
x %= cols; \
|
||||
}
|
||||
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
|
||||
#ifdef BORDER_REFLECT
|
||||
@ -83,10 +74,10 @@
|
||||
#else
|
||||
#define DELTA int delta = 1
|
||||
#endif
|
||||
#define EXTRAPOLATE(x, y, v) \
|
||||
#define EXTRAPOLATE(x, cols) \
|
||||
{ \
|
||||
DELTA; \
|
||||
if (src_cols == 1) \
|
||||
if (cols == 1) \
|
||||
x = 0; \
|
||||
else \
|
||||
do \
|
||||
@ -94,58 +85,56 @@
|
||||
if( x < 0 ) \
|
||||
x = -x - 1 + delta; \
|
||||
else \
|
||||
x = src_cols - 1 - (x - src_cols) - delta; \
|
||||
x = cols - 1 - (x - cols) - delta; \
|
||||
} \
|
||||
while (x >= src_cols || x < 0); \
|
||||
\
|
||||
if (src_rows == 1) \
|
||||
y = 0; \
|
||||
else \
|
||||
do \
|
||||
{ \
|
||||
if( y < 0 ) \
|
||||
y = -y - 1 + delta; \
|
||||
else \
|
||||
y = src_rows - 1 - (y - src_rows) - delta; \
|
||||
} \
|
||||
while (y >= src_rows || y < 0); \
|
||||
v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
|
||||
while (x >= cols || x < 0); \
|
||||
}
|
||||
#else
|
||||
#error No extrapolation method
|
||||
#error "No extrapolation method"
|
||||
#endif
|
||||
|
||||
#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
|
||||
#define NEED_EXTRAPOLATION(x, cols) (x >= cols || x < 0)
|
||||
|
||||
__kernel void copyMakeBorder(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
int top, int left, ST nVal)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int y0 = get_global_id(1) * rowsPerWI;
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
T scalar = convertScalar(nVal);
|
||||
#endif
|
||||
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
if (x < dst_cols)
|
||||
{
|
||||
int src_x = x - left;
|
||||
int src_y = y - top;
|
||||
int src_x = x - left, src_y;
|
||||
int dst_index = mad24(y0, dst_step, mad24(x, (int)TSIZE, dst_offset));
|
||||
|
||||
int dst_index = mad24(y, dst_step, mad24(x, (int)TSIZE, dst_offset));
|
||||
__global T * dst = (__global T *)(dstptr + dst_index);
|
||||
if (NEED_EXTRAPOLATION(src_x, src_cols))
|
||||
{
|
||||
#ifdef BORDER_CONSTANT
|
||||
for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; ++y, dst_index += dst_step)
|
||||
storepix(scalar, dstptr + dst_index);
|
||||
return;
|
||||
#endif
|
||||
EXTRAPOLATE(src_x, src_cols)
|
||||
}
|
||||
src_x = mad24(src_x, TSIZE, src_offset);
|
||||
|
||||
T v;
|
||||
if (NEED_EXTRAPOLATION(src_x, src_y))
|
||||
for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; ++y, dst_index += dst_step)
|
||||
{
|
||||
EXTRAPOLATE(src_x, src_y, v)
|
||||
src_y = y - top;
|
||||
if (NEED_EXTRAPOLATION(src_y, src_rows))
|
||||
{
|
||||
EXTRAPOLATE(src_y, src_rows)
|
||||
#ifdef BORDER_CONSTANT
|
||||
storepix(scalar, dstptr + dst_index);
|
||||
continue;
|
||||
#endif
|
||||
}
|
||||
int src_index = mad24(src_y, src_step, src_x);
|
||||
storepix(loadpix(srcptr + src_index), dstptr + dst_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
int src_index = mad24(src_y, src_step, mad24(src_x, TSIZE, src_offset));
|
||||
v = loadpix(srcptr + src_index);
|
||||
}
|
||||
storepix(v, dst);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user