Change local size
This commit is contained in:
parent
730ead44fe
commit
1a73aa1f6a
@ -3471,7 +3471,8 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
|
|||||||
return k.run(2, globalsize, localsize, false);
|
return k.run(2, globalsize, localsize, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
const int optimizedSepFilterLocalSize = 16;
|
const int optimizedSepFilterLocalWidth = 16;
|
||||||
|
const int optimizedSepFilterLocalHeight = 8;
|
||||||
|
|
||||||
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
||||||
Mat row_kernel, Mat col_kernel,
|
Mat row_kernel, Mat col_kernel,
|
||||||
@ -3491,8 +3492,8 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
|||||||
borderType == BORDER_REFLECT_101))
|
borderType == BORDER_REFLECT_101))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
size_t lt2[2] = { optimizedSepFilterLocalSize, optimizedSepFilterLocalSize };
|
size_t lt2[2] = { optimizedSepFilterLocalWidth, optimizedSepFilterLocalHeight };
|
||||||
size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), optimizedSepFilterLocalSize};
|
size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1]};
|
||||||
|
|
||||||
char cvt[2][40];
|
char cvt[2][40];
|
||||||
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
|
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
|
||||||
@ -3584,8 +3585,8 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
|||||||
}
|
}
|
||||||
|
|
||||||
CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 &&
|
CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 &&
|
||||||
imgSize.width > optimizedSepFilterLocalSize + anchor.x &&
|
imgSize.width > optimizedSepFilterLocalWidth + anchor.x &&
|
||||||
imgSize.height > optimizedSepFilterLocalSize + anchor.y &&
|
imgSize.height > optimizedSepFilterLocalHeight + anchor.y &&
|
||||||
(!(borderType & BORDER_ISOLATED) || _src.offset() == 0) &&
|
(!(borderType & BORDER_ISOLATED) || _src.offset() == 0) &&
|
||||||
anchor == Point(kernelX.cols >> 1, kernelY.cols >> 1) &&
|
anchor == Point(kernelX.cols >> 1, kernelY.cols >> 1) &&
|
||||||
(d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
|
(d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
|
||||||
|
@ -127,10 +127,9 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
|
|||||||
// and read my own source pixel into local memory
|
// and read my own source pixel into local memory
|
||||||
// with account for extra border pixels, which will be read by starting workitems
|
// with account for extra border pixels, which will be read by starting workitems
|
||||||
int clocY = liy;
|
int clocY = liy;
|
||||||
int cSrcY = liy + srcOffsetY - RADIUSY;
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
int yb = cSrcY;
|
int yb = clocY + srcOffsetY - RADIUSY;
|
||||||
EXTRAPOLATE(yb, (height));
|
EXTRAPOLATE(yb, (height));
|
||||||
|
|
||||||
int clocX = lix;
|
int clocX = lix;
|
||||||
@ -147,7 +146,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
|
|||||||
while(clocX < BLK_X+(RADIUSX*2));
|
while(clocX < BLK_X+(RADIUSX*2));
|
||||||
|
|
||||||
clocY += BLK_Y;
|
clocY += BLK_Y;
|
||||||
cSrcY += BLK_Y;
|
|
||||||
}
|
}
|
||||||
while (clocY < BLK_Y+(RADIUSY*2));
|
while (clocY < BLK_Y+(RADIUSY*2));
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
@ -206,8 +204,8 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
|
|||||||
}
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
int cSrcY = y + BLK_Y + liy + srcOffsetY + RADIUSY;
|
int yb = y + liy + BLK_Y + srcOffsetY + RADIUSY;
|
||||||
EXTRAPOLATE(cSrcY, (height));
|
EXTRAPOLATE(yb, (height));
|
||||||
|
|
||||||
clocX = lix;
|
clocX = lix;
|
||||||
int cSrcX = x + srcOffsetX - RADIUSX;
|
int cSrcX = x + srcOffsetX - RADIUSX;
|
||||||
@ -215,7 +213,7 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
|
|||||||
{
|
{
|
||||||
int xb = cSrcX;
|
int xb = cSrcX;
|
||||||
EXTRAPOLATE(xb,(width));
|
EXTRAPOLATE(xb,(width));
|
||||||
lsmem[liy + 2*RADIUSY][clocX] = ELEM(xb, cSrcY, (width), (height), 0 );
|
lsmem[liy + 2*RADIUSY][clocX] = ELEM(xb, yb, (width), (height), 0 );
|
||||||
|
|
||||||
clocX += BLK_X;
|
clocX += BLK_X;
|
||||||
cSrcX += BLK_X;
|
cSrcX += BLK_X;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user