Fix problems with border extrapolation in kernel. Add Isolated/Nonisolated borders.

This commit is contained in:
Vladimir Bystricky 2013-12-11 16:57:47 +04:00
parent 44126e350a
commit 268d814d18
3 changed files with 48 additions and 26 deletions

View File

@ -105,7 +105,7 @@
do \ do \
{ \ { \
if (x < minX) \ if (x < minX) \
x = -(x - minX) - 1 + delta; \ x = minX - (x - minX) - 1 + delta; \
else \ else \
x = maxX - 1 - (x - maxX) - delta; \ x = maxX - 1 - (x - maxX) - delta; \
} \ } \
@ -117,7 +117,7 @@
do \ do \
{ \ { \
if (y < minY) \ if (y < minY) \
y = -(y - minY) - 1 + delta; \ y = minY - (y - minY) - 1 + delta; \
else \ else \
y = maxY - 1 - (y - maxY) - delta; \ y = maxY - 1 - (y - maxY) - delta; \
} \ } \
@ -227,7 +227,7 @@ struct RectCoords
#endif #endif
inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, int srcstep, int srcoffset, const struct RectCoords srcCoords inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, int srcstep, const struct RectCoords srcCoords
#ifdef BORDER_CONSTANT #ifdef BORDER_CONSTANT
, SCALAR_TYPE borderValue , SCALAR_TYPE borderValue
#endif #endif
@ -239,7 +239,7 @@ inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, in
if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2) if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
#endif #endif
{ {
__global TYPE* ptr = (__global TYPE*)(srcptr + pos.y * srcstep + srcoffset + pos.x * TYPE_SIZE/*sizeof(TYPE)*/); __global TYPE* ptr = (__global TYPE*)(srcptr + pos.y * srcstep + pos.x * sizeof(TYPE));
return CONVERT_TO_FPTYPE(*ptr); return CONVERT_TO_FPTYPE(*ptr);
} }
else else
@ -265,7 +265,7 @@ inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, in
pos = (int2)(selected_col, selected_row); pos = (int2)(selected_col, selected_row);
if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2) if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
{ {
__global TYPE* ptr = (__global TYPE*)(srcptr + pos.y * srcstep + srcoffset + pos.x * TYPE_SIZE/*sizeof(TYPE)*/); __global TYPE* ptr = (__global TYPE*)(srcptr + pos.y * srcstep + pos.x * sizeof(TYPE));
return CONVERT_TO_FPTYPE(*ptr); return CONVERT_TO_FPTYPE(*ptr);
} }
else else
@ -282,8 +282,8 @@ inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, in
__kernel __kernel
__attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1))) __attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
void boxFilter(__global const uchar* srcptr, int srcstep, int srcoffset, void boxFilter(__global const uchar* srcptr, int srcstep, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY,
__global uchar* dstptr, int dststep, int dstoffset, __global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols, int rows, int cols,
#ifdef BORDER_CONSTANT #ifdef BORDER_CONSTANT
SCALAR_TYPE borderValue, SCALAR_TYPE borderValue,
@ -291,8 +291,7 @@ void boxFilter(__global const uchar* srcptr, int srcstep, int srcoffset,
FPTYPE alpha FPTYPE alpha
) )
{ {
const struct RectCoords srcCoords = {0, 0, cols, rows}; // for non-isolated border: offsetX, offsetY, wholeX, wholeY const struct RectCoords srcCoords = {srcOffsetX, srcOffsetY, srcEndX, srcEndY}; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const struct RectCoords dstCoords = {0, 0, cols, rows};
const int x = get_local_id(0) + (LOCAL_SIZE - (KERNEL_SIZE_X - 1)) * get_group_id(0) - ANCHOR_X; const int x = get_local_id(0) + (LOCAL_SIZE - (KERNEL_SIZE_X - 1)) * get_group_id(0) - ANCHOR_X;
const int y = get_global_id(1) * BLOCK_SIZE_Y; const int y = get_global_id(1) * BLOCK_SIZE_Y;
@ -305,7 +304,7 @@ void boxFilter(__global const uchar* srcptr, int srcstep, int srcoffset,
int2 srcPos = (int2)(srcCoords.x1 + x, srcCoords.y1 + y - ANCHOR_Y); int2 srcPos = (int2)(srcCoords.x1 + x, srcCoords.y1 + y - ANCHOR_Y);
for(int sy = 0; sy < KERNEL_SIZE_Y; sy++, srcPos.y++) for(int sy = 0; sy < KERNEL_SIZE_Y; sy++, srcPos.y++)
{ {
data[sy] = readSrcPixel(srcPos, srcptr, srcstep, srcoffset, srcCoords data[sy] = readSrcPixel(srcPos, srcptr, srcstep, srcCoords
#ifdef BORDER_CONSTANT #ifdef BORDER_CONSTANT
, borderValue , borderValue
#endif #endif
@ -321,20 +320,20 @@ void boxFilter(__global const uchar* srcptr, int srcstep, int srcoffset,
sumOfCols[local_id] = tmp_sum; sumOfCols[local_id] = tmp_sum;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int2 pos = (int2)(dstCoords.x1 + x, dstCoords.y1 + y); int2 pos = (int2)(x, y);
__global TYPE* dstPtr = (__global TYPE*)(dstptr + pos.y * dststep + dstoffset + pos.x * TYPE_SIZE/*sizeof(TYPE)*/); // Pointer can be out of bounds! __global TYPE* dstPtr = (__global TYPE*)(dstptr + pos.y * dststep + dstoffset + pos.x * TYPE_SIZE/*sizeof(TYPE)*/); // Pointer can be out of bounds!
int sy_index = 0; // current index in data[] array int sy_index = 0; // current index in data[] array
int stepsY = min(dstCoords.y2 - pos.y, BLOCK_SIZE_Y); int stepsY = min(rows - pos.y, BLOCK_SIZE_Y);
ASSERT(stepsY > 0); ASSERT(stepsY > 0);
for (; ;) for (; ;)
{ {
ASSERT(pos.y < dstCoords.y2); ASSERT(pos.y < rows);
if(local_id >= ANCHOR_X && local_id < LOCAL_SIZE - (KERNEL_SIZE_X - 1 - ANCHOR_X) && if(local_id >= ANCHOR_X && local_id < LOCAL_SIZE - (KERNEL_SIZE_X - 1 - ANCHOR_X) &&
pos.x >= dstCoords.x1 && pos.x < dstCoords.x2) pos.x >= 0 && pos.x < cols)
{ {
ASSERT(pos.y >= dstCoords.y1 && pos.y < dstCoords.y2); ASSERT(pos.y >= 0 && pos.y < rows);
INTERMEDIATE_TYPE total_sum = 0; INTERMEDIATE_TYPE total_sum = 0;
#pragma unroll #pragma unroll
@ -357,7 +356,7 @@ void boxFilter(__global const uchar* srcptr, int srcstep, int srcoffset,
// only works with scalars: ASSERT(fabs(tmp_sum - sumOfCols[local_id]) < (INTERMEDIATE_TYPE)1e-6); // only works with scalars: ASSERT(fabs(tmp_sum - sumOfCols[local_id]) < (INTERMEDIATE_TYPE)1e-6);
tmp_sum -= data[sy_index]; tmp_sum -= data[sy_index];
data[sy_index] = readSrcPixel(srcPos, srcptr, srcstep, srcoffset, srcCoords data[sy_index] = readSrcPixel(srcPos, srcptr, srcstep, srcCoords
#ifdef BORDER_CONSTANT #ifdef BORDER_CONSTANT
, borderValue , borderValue
#endif #endif

View File

@ -667,6 +667,14 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
size_t globalsize[2] = {sz.width, sz.height}; size_t globalsize[2] = {sz.width, sz.height};
size_t localsize[2] = {0, 1}; size_t localsize[2] = {0, 1};
UMat src; Size wholeSize;
if (!isIsolatedBorder)
{
src = _src.getUMat();
Point ofs;
src.locateROI(wholeSize, ofs);
}
size_t maxWorkItemSizes[32]; device.maxWorkItemSizes(maxWorkItemSizes); size_t maxWorkItemSizes[32]; device.maxWorkItemSizes(maxWorkItemSizes);
size_t tryWorkItems = maxWorkItemSizes[0]; size_t tryWorkItems = maxWorkItemSizes[0];
for (;;) for (;;)
@ -685,8 +693,9 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
int requiredLeft = (int)BLOCK_SIZE; // not this: anchor.x; int requiredLeft = (int)BLOCK_SIZE; // not this: anchor.x;
int requiredBottom = ksize.height - 1 - anchor.y; int requiredBottom = ksize.height - 1 - anchor.y;
int requiredRight = (int)BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x; int requiredRight = (int)BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x;
int h = sz.height; int h = isIsolatedBorder ? sz.height : wholeSize.height;
int w = sz.width; int w = isIsolatedBorder ? sz.width : wholeSize.width;
bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight; bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight;
if ((w < ksize.width) || (h < ksize.height)) if ((w < ksize.width) || (h < ksize.height))
@ -719,10 +728,19 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
_dst.create(sz, CV_MAKETYPE(ddepth, cn)); _dst.create(sz, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
UMat src = _src.getUMat(); if (src.empty())
src = _src.getUMat();
int idxArg = 0; int idxArg = 0;
idxArg = kernel.set(idxArg, ocl::KernelArg::ReadOnlyNoSize(src)); idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
idxArg = kernel.set(idxArg, (int)src.step);
int srcOffsetX = (int)((src.offset % src.step) / src.elemSize());
int srcOffsetY = (int)(src.offset / src.step);
int srcEndX = (isIsolatedBorder ? (srcOffsetX + sz.width) : wholeSize.width);
int srcEndY = (isIsolatedBorder ? (srcOffsetY + sz.height) : wholeSize.height);
idxArg = kernel.set(idxArg, srcOffsetX);
idxArg = kernel.set(idxArg, srcOffsetY);
idxArg = kernel.set(idxArg, srcEndX);
idxArg = kernel.set(idxArg, srcEndY);
idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst)); idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst));
float borderValue[4] = {0, 0, 0, 0}; float borderValue[4] = {0, 0, 0, 0};
double borderValueDouble[4] = {0, 0, 0, 0}; double borderValueDouble[4] = {0, 0, 0, 0};

View File

@ -58,6 +58,9 @@ enum
// boxFilter // boxFilter
PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool) PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool)
{ {
static const int kernelMinSize = 2;
static const int kernelMaxSize = 10;
int type; int type;
Size ksize; Size ksize;
Size dsize; Size dsize;
@ -71,7 +74,7 @@ PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool)
virtual void SetUp() virtual void SetUp()
{ {
type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1)); type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
borderType = GET_PARAM(2); borderType = GET_PARAM(2); // only not isolated border tested, because CPU module doesn't support isolated border case.
useRoi = GET_PARAM(3); useRoi = GET_PARAM(3);
} }
@ -79,16 +82,17 @@ PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool)
{ {
dsize = randomSize(1, MAX_VALUE); dsize = randomSize(1, MAX_VALUE);
ksize = randomSize(1, dsize.width, 1, dsize.height); ksize = randomSize(kernelMinSize, kernelMaxSize);
Size roiSize = randomSize(1, MAX_VALUE); Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, dsize, dstBorder, type, -MAX_VALUE, MAX_VALUE); randomSubMat(dst, dst_roi, dsize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
anchor.x = anchor.y = -1; anchor.x = randomInt(-1, ksize.width);
anchor.y = randomInt(-1, ksize.height);
UMAT_UPLOAD_INPUT_PARAMETER(src) UMAT_UPLOAD_INPUT_PARAMETER(src)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst) UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
@ -123,7 +127,8 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter,
(BorderType)BORDER_REPLICATE, (BorderType)BORDER_REPLICATE,
(BorderType)BORDER_REFLECT, (BorderType)BORDER_REFLECT,
(BorderType)BORDER_REFLECT_101), (BorderType)BORDER_REFLECT_101),
Bool()) Bool() // ROI
)
); );