Merge pull request #3394 from akarsakov:ocl_canny
This commit is contained in:
commit
569a95e9e1
@ -138,10 +138,10 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float
|
||||
*/
|
||||
char cvt[40];
|
||||
ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
|
||||
format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_intN=%s -D intN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
|
||||
format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
|
||||
cn, ocl::memopTypeToStr(_src.depth()),
|
||||
ocl::convertTypeStr(_src.type(), CV_32SC(cn), cn, cvt),
|
||||
ocl::memopTypeToStr(CV_32SC(cn)),
|
||||
ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
|
||||
ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
|
||||
lSizeX, lSizeY,
|
||||
L2gradient ? " -D L2GRAD" : ""));
|
||||
if (with_sobel.empty())
|
||||
@ -151,7 +151,7 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float
|
||||
map.create(size, CV_32S);
|
||||
with_sobel.args(ocl::KernelArg::ReadOnly(src),
|
||||
ocl::KernelArg::WriteOnlyNoSize(map),
|
||||
low, high);
|
||||
(float) low, (float) high);
|
||||
|
||||
size_t globalsize[2] = { size.width, size.height },
|
||||
localsize[2] = { lSizeX, lSizeY };
|
||||
|
@ -49,9 +49,9 @@
|
||||
#ifdef WITH_SOBEL
|
||||
|
||||
#if cn == 1
|
||||
#define loadpix(addr) convert_intN(*(__global const TYPE *)(addr))
|
||||
#define loadpix(addr) convert_floatN(*(__global const TYPE *)(addr))
|
||||
#else
|
||||
#define loadpix(addr) convert_intN(vload3(0, (__global const TYPE *)(addr)))
|
||||
#define loadpix(addr) convert_floatN(vload3(0, (__global const TYPE *)(addr)))
|
||||
#endif
|
||||
#define storepix(value, addr) *(__global int *)(addr) = (int)(value)
|
||||
|
||||
@ -77,23 +77,21 @@ __constant int next[4][2] = {
|
||||
{ 1, 1 }
|
||||
};
|
||||
|
||||
inline int3 sobel(int idx, __local const intN *smem)
|
||||
inline float3 sobel(int idx, __local const floatN *smem)
|
||||
{
|
||||
// result: x, y, mag
|
||||
int3 res;
|
||||
float3 res;
|
||||
|
||||
intN dx = smem[idx + 2] - smem[idx]
|
||||
+ 2 * (smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4])
|
||||
+ smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8];
|
||||
floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
|
||||
smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);
|
||||
|
||||
intN dy = smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]
|
||||
+ 2 * (smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9])
|
||||
+ smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10];
|
||||
floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
|
||||
smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]);
|
||||
|
||||
#ifdef L2GRAD
|
||||
intN magN = dx * dx + dy * dy;
|
||||
floatN magN = fma(dx, dx, dy * dy);
|
||||
#else
|
||||
intN magN = convert_intN(abs(dx) + abs(dy));
|
||||
floatN magN = fabs(dx) + fabs(dy);
|
||||
#endif
|
||||
#if cn == 1
|
||||
res.z = magN;
|
||||
@ -120,9 +118,9 @@ inline int3 sobel(int idx, __local const intN *smem)
|
||||
|
||||
__kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src_offset, int rows, int cols,
|
||||
__global uchar *map, int map_step, int map_offset,
|
||||
int low_thr, int high_thr)
|
||||
float low_thr, float high_thr)
|
||||
{
|
||||
__local intN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)];
|
||||
__local floatN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)];
|
||||
|
||||
int lidx = get_local_id(0);
|
||||
int lidy = get_local_id(1);
|
||||
@ -143,7 +141,7 @@ __kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src
|
||||
//// Sobel, Magnitude
|
||||
//
|
||||
|
||||
__local int mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)];
|
||||
__local float mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)];
|
||||
|
||||
lidx++;
|
||||
lidy++;
|
||||
@ -164,13 +162,13 @@ __kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src
|
||||
int idx = lidx + lidy * (GRP_SIZEX + 4);
|
||||
i = lidx + lidy * (GRP_SIZEX + 2);
|
||||
|
||||
int3 res = sobel(idx, smem);
|
||||
float3 res = sobel(idx, smem);
|
||||
mag[i] = res.z;
|
||||
int x = res.x;
|
||||
int y = res.y;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int x = (int) res.x;
|
||||
int y = (int) res.y;
|
||||
|
||||
//// Threshold + Non maxima suppression
|
||||
//
|
||||
|
||||
@ -218,7 +216,7 @@ __kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src
|
||||
if (gidx >= cols || gidy >= rows)
|
||||
return;
|
||||
|
||||
int mag0 = mag[i];
|
||||
float mag0 = mag[i];
|
||||
|
||||
int value = 1;
|
||||
if (mag0 > low_thr)
|
||||
@ -235,8 +233,8 @@ __kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src
|
||||
|
||||
int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31); // if a = 1, b = 1, dy ^ dx < 0
|
||||
int dir = a * b + 2 * dir3;
|
||||
int prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]];
|
||||
int next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1);
|
||||
float prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]];
|
||||
float next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1);
|
||||
|
||||
if (mag0 > prev_mag && mag0 >= next_mag)
|
||||
{
|
||||
@ -384,12 +382,12 @@ __constant short move_dir[2][8] = {
|
||||
{ -1, 0, 1, -1, 1, -1, 0, 1 }
|
||||
};
|
||||
|
||||
__kernel void stage2_hysteresis(__global uchar *map, int map_step, int map_offset, int rows, int cols)
|
||||
__kernel void stage2_hysteresis(__global uchar *map_ptr, int map_step, int map_offset, int rows, int cols)
|
||||
{
|
||||
map += map_offset;
|
||||
map_ptr += map_offset;
|
||||
|
||||
int x = get_global_id(0);
|
||||
int y0 = get_global_id(1) * PIX_PER_WI;
|
||||
int y = get_global_id(1) * PIX_PER_WI;
|
||||
|
||||
int lid = get_local_id(0) + get_local_id(1) * LOCAL_X;
|
||||
|
||||
@ -400,15 +398,23 @@ __kernel void stage2_hysteresis(__global uchar *map, int map_step, int map_offse
|
||||
l_counter = 0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#pragma unroll
|
||||
for (int y = y0; y < min(y0 + PIX_PER_WI, rows); ++y)
|
||||
if (x < cols)
|
||||
{
|
||||
if (x < cols)
|
||||
__global uchar* map = map_ptr + mad24(y, map_step, x * (int)sizeof(int));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI; ++cy)
|
||||
{
|
||||
int type = loadpix(map + mad24(y, map_step, x * (int)sizeof(int)));
|
||||
if (type == 2)
|
||||
if (y < rows)
|
||||
{
|
||||
l_stack[atomic_inc(&l_counter)] = (ushort2)(x, y);
|
||||
int type = loadpix(map);
|
||||
if (type == 2)
|
||||
{
|
||||
l_stack[atomic_inc(&l_counter)] = (ushort2)(x, y);
|
||||
}
|
||||
|
||||
y++;
|
||||
map += map_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -422,7 +428,6 @@ __kernel void stage2_hysteresis(__global uchar *map, int map_step, int map_offse
|
||||
int mod = l_counter % LOCAL_TOTAL;
|
||||
int pix_per_thr = l_counter / LOCAL_TOTAL + ((lid < mod) ? 1 : 0);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < pix_per_thr; ++i)
|
||||
{
|
||||
ushort2 pos = l_stack[ atomic_dec(&l_counter) - 1 ];
|
||||
@ -434,7 +439,7 @@ __kernel void stage2_hysteresis(__global uchar *map, int map_step, int map_offse
|
||||
ushort posy = pos.y + move_dir[1][j];
|
||||
if (posx < 0 || posy < 0 || posx >= cols || posy >= rows)
|
||||
continue;
|
||||
__global uchar *addr = map + mad24(posy, map_step, posx * (int)sizeof(int));
|
||||
__global uchar *addr = map_ptr + mad24(posy, map_step, posx * (int)sizeof(int));
|
||||
int type = loadpix(addr);
|
||||
if (type == 0)
|
||||
{
|
||||
@ -463,16 +468,26 @@ __kernel void getEdges(__global const uchar *mapptr, int map_step, int map_offse
|
||||
__global uchar *dst, int dst_step, int dst_offset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y0 = get_global_id(1) * PIX_PER_WI;
|
||||
int y = get_global_id(1) * PIX_PER_WI;
|
||||
|
||||
#pragma unroll
|
||||
for (int y = y0; y < min(y0 + PIX_PER_WI, rows); ++y)
|
||||
if (x < cols)
|
||||
{
|
||||
int map_index = mad24(map_step, y, mad24(x, (int)sizeof(int), map_offset));
|
||||
int dst_index = mad24(dst_step, y, x) + dst_offset;
|
||||
int dst_index = mad24(dst_step, y, x + dst_offset);
|
||||
|
||||
__global const int * map = (__global const int *)(mapptr + map_index);
|
||||
dst[dst_index] = (uchar)(-(map[0] >> 1));
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const int * map = (__global const int *)(mapptr + map_index);
|
||||
dst[dst_index] = (uchar)(-(map[0] >> 1));
|
||||
|
||||
y++;
|
||||
map_index += map_step;
|
||||
dst_index += dst_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user