Added new border types for pyrDown
This commit is contained in:
parent
7032a5a46c
commit
5022a0fae3
@ -51,6 +51,22 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined BORDER_REPLICATE
|
||||||
|
// aaaaaa|abcdefgh|hhhhhhh
|
||||||
|
#define EXTRAPOLATE(x, maxV) clamp(x, 0, maxV-1)
|
||||||
|
#elif defined BORDER_WRAP
|
||||||
|
// cdefgh|abcdefgh|abcdefg
|
||||||
|
#define EXTRAPOLATE(x, maxV) ( (x) + (maxV) ) % (maxV)
|
||||||
|
#elif defined BORDER_REFLECT
|
||||||
|
// fedcba|abcdefgh|hgfedcb
|
||||||
|
#define EXTRAPOLATE(x, maxV) min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) )
|
||||||
|
#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101
|
||||||
|
// gfedcb|abcdefgh|gfedcba
|
||||||
|
#define EXTRAPOLATE(x, maxV) min(((maxV)-1)*2-(x), max((x),-(x)) )
|
||||||
|
#else
|
||||||
|
#error No extrapolation method
|
||||||
|
#endif
|
||||||
|
|
||||||
#if cn != 3
|
#if cn != 3
|
||||||
#define loadpix(addr) *(__global const T*)(addr)
|
#define loadpix(addr) *(__global const T*)(addr)
|
||||||
#define storepix(val, addr) *(__global T*)(addr) = (val)
|
#define storepix(val, addr) *(__global T*)(addr) = (val)
|
||||||
@ -61,45 +77,17 @@
|
|||||||
#define PIXSIZE ((int)sizeof(T1)*3)
|
#define PIXSIZE ((int)sizeof(T1)*3)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define SRC(_x,_y) convertToFT(loadpix(srcData + mad24(_y, src_step, PIXSIZE * _x)))
|
||||||
|
|
||||||
#define noconvert
|
#define noconvert
|
||||||
|
|
||||||
inline int idx_row_low(int y, int last_row)
|
|
||||||
{
|
|
||||||
return abs(y) % (last_row + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int idx_row_high(int y, int last_row)
|
|
||||||
{
|
|
||||||
return abs(last_row - (int)abs(last_row - y)) % (last_row + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int idx_row(int y, int last_row)
|
|
||||||
{
|
|
||||||
return idx_row_low(idx_row_high(y, last_row), last_row);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int idx_col_low(int x, int last_col)
|
|
||||||
{
|
|
||||||
return abs(x) % (last_col + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int idx_col_high(int x, int last_col)
|
|
||||||
{
|
|
||||||
return abs(last_col - (int)abs(last_col - x)) % (last_col + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int idx_col(int x, int last_col)
|
|
||||||
{
|
|
||||||
return idx_col_low(idx_col_high(x, last_col), last_col);
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||||
{
|
{
|
||||||
const int x = get_global_id(0);
|
const int x = get_global_id(0);
|
||||||
const int y = get_group_id(1);
|
const int y = get_group_id(1);
|
||||||
|
|
||||||
__local FT smem[256 + 4];
|
__local FT smem[LOCAL_SIZE + 4];
|
||||||
__global uchar * dstData = dst + dst_offset;
|
__global uchar * dstData = dst + dst_offset;
|
||||||
__global const uchar * srcData = src + src_offset;
|
__global const uchar * srcData = src + src_offset;
|
||||||
|
|
||||||
@ -109,16 +97,14 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
|
|||||||
FT co3 = 0.0625f;
|
FT co3 = 0.0625f;
|
||||||
|
|
||||||
const int src_y = 2*y;
|
const int src_y = 2*y;
|
||||||
const int last_row = src_rows - 1;
|
|
||||||
const int last_col = src_cols - 1;
|
|
||||||
|
|
||||||
if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
|
if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
|
||||||
{
|
{
|
||||||
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + x * PIXSIZE));
|
sum = co3 * SRC(x, src_y - 2);
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + x * PIXSIZE));
|
sum = sum + co2 * SRC(x, src_y - 1);
|
||||||
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + x * PIXSIZE));
|
sum = sum + co1 * SRC(x, src_y );
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + x * PIXSIZE));
|
sum = sum + co2 * SRC(x, src_y + 1);
|
||||||
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + x * PIXSIZE));
|
sum = sum + co3 * SRC(x, src_y + 2);
|
||||||
|
|
||||||
smem[2 + get_local_id(0)] = sum;
|
smem[2 + get_local_id(0)] = sum;
|
||||||
|
|
||||||
@ -126,66 +112,62 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
|
|||||||
{
|
{
|
||||||
const int left_x = x - 2;
|
const int left_x = x - 2;
|
||||||
|
|
||||||
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + left_x * PIXSIZE));
|
sum = co3 * SRC(left_x, src_y - 2);
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + left_x * PIXSIZE));
|
sum = sum + co2 * SRC(left_x, src_y - 1);
|
||||||
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + left_x * PIXSIZE));
|
sum = sum + co1 * SRC(left_x, src_y );
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + left_x * PIXSIZE));
|
sum = sum + co2 * SRC(left_x, src_y + 1);
|
||||||
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + left_x * PIXSIZE));
|
sum = sum + co3 * SRC(left_x, src_y + 2);
|
||||||
|
|
||||||
smem[get_local_id(0)] = sum;
|
smem[get_local_id(0)] = sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (get_local_id(0) > 253)
|
if (get_local_id(0) > LOCAL_SIZE - 3)
|
||||||
{
|
{
|
||||||
const int right_x = x + 2;
|
const int right_x = x + 2;
|
||||||
|
|
||||||
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + right_x * PIXSIZE));
|
sum = co3 * SRC(right_x, src_y - 2);
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + right_x * PIXSIZE));
|
sum = sum + co2 * SRC(right_x, src_y - 1);
|
||||||
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + right_x * PIXSIZE));
|
sum = sum + co1 * SRC(right_x, src_y );
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + right_x * PIXSIZE));
|
sum = sum + co2 * SRC(right_x, src_y + 1);
|
||||||
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + right_x * PIXSIZE));
|
sum = sum + co3 * SRC(right_x, src_y + 2);
|
||||||
|
|
||||||
smem[4 + get_local_id(0)] = sum;
|
smem[4 + get_local_id(0)] = sum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int col = idx_col(x, last_col);
|
int col = EXTRAPOLATE(x, src_cols);
|
||||||
|
|
||||||
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
|
sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
|
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
|
||||||
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
|
sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows));
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
|
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
|
||||||
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
|
sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
|
||||||
|
|
||||||
smem[2 + get_local_id(0)] = sum;
|
smem[2 + get_local_id(0)] = sum;
|
||||||
|
|
||||||
if (get_local_id(0) < 2)
|
if (get_local_id(0) < 2)
|
||||||
{
|
{
|
||||||
const int left_x = x - 2;
|
col = EXTRAPOLATE(x - 2, src_cols);
|
||||||
|
|
||||||
col = idx_col(left_x, last_col);
|
sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
|
||||||
|
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
|
||||||
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
|
sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows));
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
|
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
|
||||||
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
|
sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
|
|
||||||
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
|
|
||||||
|
|
||||||
smem[get_local_id(0)] = sum;
|
smem[get_local_id(0)] = sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (get_local_id(0) > 253)
|
if (get_local_id(0) > LOCAL_SIZE - 3)
|
||||||
{
|
{
|
||||||
const int right_x = x + 2;
|
col = EXTRAPOLATE(x + 2, src_cols);
|
||||||
|
|
||||||
col = idx_col(right_x, last_col);
|
sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
|
||||||
|
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
|
||||||
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
|
sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows));
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
|
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
|
||||||
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
|
sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
|
||||||
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
|
|
||||||
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
|
|
||||||
|
|
||||||
smem[4 + get_local_id(0)] = sum;
|
smem[4 + get_local_id(0)] = sum;
|
||||||
}
|
}
|
||||||
@ -193,7 +175,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
|
|||||||
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
if (get_local_id(0) < 128)
|
if (get_local_id(0) < LOCAL_SIZE / 2)
|
||||||
{
|
{
|
||||||
const int tid2 = get_local_id(0) * 2;
|
const int tid2 = get_local_id(0) * 2;
|
||||||
|
|
||||||
|
@ -407,11 +407,8 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
|
|||||||
{
|
{
|
||||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
|
||||||
|
|
||||||
if (channels > 4 || borderType != BORDER_DEFAULT)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||||
if ((depth == CV_64F) && !(doubleSupport))
|
if (channels > 4 || (depth == CV_64F && !doubleSupport))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
Size ssize = _src.size();
|
Size ssize = _src.size();
|
||||||
@ -425,15 +422,18 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
|
|||||||
UMat dst = _dst.getUMat();
|
UMat dst = _dst.getUMat();
|
||||||
|
|
||||||
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
|
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
|
||||||
|
const int local_size = 256;
|
||||||
|
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
|
||||||
|
"BORDER_REFLECT_101" };
|
||||||
char cvt[2][50];
|
char cvt[2][50];
|
||||||
String buildOptions = format(
|
String buildOptions = format(
|
||||||
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
|
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
|
||||||
"-D T1=%s -D cn=%d",
|
"-D T1=%s -D cn=%d -D %s -D LOCAL_SIZE=%d",
|
||||||
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
|
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
|
||||||
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
|
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
|
||||||
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
|
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
|
||||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||||
ocl::typeToStr(depth), channels
|
ocl::typeToStr(depth), channels, borderMap[borderType], local_size
|
||||||
);
|
);
|
||||||
ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
|
ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
@ -441,7 +441,7 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
|
|||||||
|
|
||||||
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
|
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
|
||||||
|
|
||||||
size_t localThreads[2] = { 256, 1 };
|
size_t localThreads[2] = { local_size, 1 };
|
||||||
size_t globalThreads[2] = { src.cols, dst.rows };
|
size_t globalThreads[2] = { src.cols, dst.rows };
|
||||||
return k.run(2, globalThreads, localThreads, false);
|
return k.run(2, globalThreads, localThreads, false);
|
||||||
}
|
}
|
||||||
|
@ -52,9 +52,9 @@
|
|||||||
namespace cvtest {
|
namespace cvtest {
|
||||||
namespace ocl {
|
namespace ocl {
|
||||||
|
|
||||||
PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
|
PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, BorderType, bool)
|
||||||
{
|
{
|
||||||
int depth, channels;
|
int depth, channels, borderType;
|
||||||
bool use_roi;
|
bool use_roi;
|
||||||
|
|
||||||
TEST_DECLARE_INPUT_PARAMETER(src);
|
TEST_DECLARE_INPUT_PARAMETER(src);
|
||||||
@ -64,7 +64,8 @@ PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
|
|||||||
{
|
{
|
||||||
depth = GET_PARAM(0);
|
depth = GET_PARAM(0);
|
||||||
channels = GET_PARAM(1);
|
channels = GET_PARAM(1);
|
||||||
use_roi = GET_PARAM(2);
|
borderType = GET_PARAM(2);
|
||||||
|
use_roi = GET_PARAM(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void generateTestData(Size src_roiSize, Size dst_roiSize)
|
void generateTestData(Size src_roiSize, Size dst_roiSize)
|
||||||
@ -99,8 +100,8 @@ OCL_TEST_P(PyrDown, Mat)
|
|||||||
dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
|
dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
|
||||||
generateTestData(src_roiSize, dst_roiSize);
|
generateTestData(src_roiSize, dst_roiSize);
|
||||||
|
|
||||||
OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize));
|
OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize, borderType));
|
||||||
OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize));
|
OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize, borderType));
|
||||||
|
|
||||||
Near(depth == CV_32F ? 1e-4f : 1.0f);
|
Near(depth == CV_32F ? 1e-4f : 1.0f);
|
||||||
}
|
}
|
||||||
@ -109,6 +110,8 @@ OCL_TEST_P(PyrDown, Mat)
|
|||||||
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine(
|
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine(
|
||||||
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
|
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
|
||||||
Values(1, 2, 3, 4),
|
Values(1, 2, 3, 4),
|
||||||
|
Values((BorderType)BORDER_REPLICATE,
|
||||||
|
(BorderType)BORDER_REFLECT, (BorderType)BORDER_REFLECT_101),
|
||||||
Bool()
|
Bool()
|
||||||
));
|
));
|
||||||
|
|
||||||
@ -124,8 +127,8 @@ OCL_TEST_P(PyrUp, Mat)
|
|||||||
Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height);
|
Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height);
|
||||||
generateTestData(src_roiSize, dst_roiSize);
|
generateTestData(src_roiSize, dst_roiSize);
|
||||||
|
|
||||||
OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize));
|
OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize, borderType));
|
||||||
OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize));
|
OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize, borderType));
|
||||||
|
|
||||||
Near(depth == CV_32F ? 1e-4f : 1.0f);
|
Near(depth == CV_32F ? 1e-4f : 1.0f);
|
||||||
}
|
}
|
||||||
@ -134,6 +137,7 @@ OCL_TEST_P(PyrUp, Mat)
|
|||||||
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine(
|
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine(
|
||||||
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
|
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
|
||||||
Values(1, 2, 3, 4),
|
Values(1, 2, 3, 4),
|
||||||
|
Values((BorderType)BORDER_REFLECT_101),
|
||||||
Bool()
|
Bool()
|
||||||
));
|
));
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user