Merge pull request #1701 from alalek:ocl_fix_canny
This commit is contained in:
@@ -987,12 +987,12 @@ namespace cv
|
|||||||
|
|
||||||
struct CV_EXPORTS CannyBuf
|
struct CV_EXPORTS CannyBuf
|
||||||
{
|
{
|
||||||
CannyBuf() : counter(NULL) {}
|
CannyBuf() : counter(1, 1, CV_32S) { }
|
||||||
~CannyBuf()
|
~CannyBuf()
|
||||||
{
|
{
|
||||||
release();
|
release();
|
||||||
}
|
}
|
||||||
explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
|
explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(1, 1, CV_32S)
|
||||||
{
|
{
|
||||||
create(image_size, apperture_size);
|
create(image_size, apperture_size);
|
||||||
}
|
}
|
||||||
@@ -1004,7 +1004,7 @@ namespace cv
|
|||||||
oclMat dx_buf, dy_buf;
|
oclMat dx_buf, dy_buf;
|
||||||
oclMat edgeBuf;
|
oclMat edgeBuf;
|
||||||
oclMat trackBuf1, trackBuf2;
|
oclMat trackBuf1, trackBuf2;
|
||||||
void *counter;
|
oclMat counter;
|
||||||
Ptr<FilterEngine_GPU> filterDX, filterDY;
|
Ptr<FilterEngine_GPU> filterDX, filterDY;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -49,7 +49,7 @@
|
|||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
|
|
||||||
cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(NULL)
|
cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(1, 1, CV_32SC1)
|
||||||
{
|
{
|
||||||
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
|
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
|
||||||
|
|
||||||
@@ -80,17 +80,8 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
|
|||||||
}
|
}
|
||||||
ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
|
ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
|
||||||
|
|
||||||
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
|
ensureSizeIsEnough(1, image_size.area(), CV_16UC2, trackBuf1);
|
||||||
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
|
ensureSizeIsEnough(1, image_size.area(), CV_16UC2, trackBuf2);
|
||||||
|
|
||||||
int counter_i [1] = { 0 };
|
|
||||||
int err = 0;
|
|
||||||
if(counter)
|
|
||||||
{
|
|
||||||
openCLFree(counter);
|
|
||||||
}
|
|
||||||
counter = clCreateBuffer( *((cl_context*)getClContextPtr()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
|
|
||||||
openCLSafeCall(err);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::CannyBuf::release()
|
void cv::ocl::CannyBuf::release()
|
||||||
@@ -102,7 +93,6 @@ void cv::ocl::CannyBuf::release()
|
|||||||
edgeBuf.release();
|
edgeBuf.release();
|
||||||
trackBuf1.release();
|
trackBuf1.release();
|
||||||
trackBuf2.release();
|
trackBuf2.release();
|
||||||
openCLFree(counter);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
@@ -118,9 +108,9 @@ namespace cv
|
|||||||
|
|
||||||
void calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh);
|
void calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh);
|
||||||
|
|
||||||
void edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols);
|
void edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, oclMat& counter, int rows, int cols);
|
||||||
|
|
||||||
void edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols);
|
void edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, oclMat& counter, int rows, int cols);
|
||||||
|
|
||||||
void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols);
|
void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols);
|
||||||
}
|
}
|
||||||
@@ -322,54 +312,61 @@ void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int ro
|
|||||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
|
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, oclMat& counter, int rows, int cols)
|
||||||
{
|
{
|
||||||
Context *clCxt = map.clCxt;
|
Context *clCxt = map.clCxt;
|
||||||
string kernelName = "edgesHysteresisLocal";
|
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
|
|
||||||
|
Mat counterMat(counter.rows, counter.cols, counter.type());
|
||||||
|
counterMat.at<int>(0, 0) = 0;
|
||||||
|
counter.upload(counterMat);
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
cl_int stepBytes = map.step;
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&stepBytes));
|
||||||
|
cl_int offsetBytes = map.offset;
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&offsetBytes));
|
||||||
|
|
||||||
size_t globalThreads[3] = {cols, rows, 1};
|
size_t globalThreads[3] = {cols, rows, 1};
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &imgproc_canny, "edgesHysteresisLocal", globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
|
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, oclMat& counter, int rows, int cols)
|
||||||
{
|
{
|
||||||
unsigned int count;
|
|
||||||
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
|
|
||||||
Context *clCxt = map.clCxt;
|
Context *clCxt = map.clCxt;
|
||||||
string kernelName = "edgesHysteresisGlobal";
|
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
size_t localThreads[3] = {128, 1, 1};
|
size_t localThreads[3] = {128, 1, 1};
|
||||||
|
|
||||||
int count_i[1] = {0};
|
while(1 > 0)
|
||||||
while(count > 0)
|
|
||||||
{
|
{
|
||||||
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
|
Mat counterMat; counter.download(counterMat);
|
||||||
|
int count = counterMat.at<int>(0, 0);
|
||||||
|
CV_Assert(count >= 0);
|
||||||
|
if (count == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
counterMat.at<int>(0, 0) = 0;
|
||||||
|
counter.upload(counterMat);
|
||||||
|
|
||||||
args.clear();
|
args.clear();
|
||||||
size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1};
|
size_t globalThreads[3] = {std::min((unsigned)count, 65535u) * 128, divUp(count, 65535), 1};
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
|
||||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&count));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&count));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &imgproc_canny, "edgesHysteresisGlobal", globalThreads, localThreads, args, -1, -1);
|
||||||
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
|
|
||||||
std::swap(st1, st2);
|
std::swap(st1, st2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -374,6 +374,14 @@ calcMap
|
|||||||
#undef CANNY_SHIFT
|
#undef CANNY_SHIFT
|
||||||
#undef TG22
|
#undef TG22
|
||||||
|
|
||||||
|
struct PtrStepSz {
|
||||||
|
__global int *ptr;
|
||||||
|
int step;
|
||||||
|
int rows, cols;
|
||||||
|
};
|
||||||
|
inline int get(struct PtrStepSz data, int y, int x) { return *((__global int *)((__global char*)data.ptr + data.step * y + sizeof(int) * x)); }
|
||||||
|
inline void set(struct PtrStepSz data, int y, int x, int value) { *((__global int *)((__global char*)data.ptr + data.step * y + sizeof(int) * x)) = value; }
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// do Hysteresis for pixel whose edge type is 1
|
// do Hysteresis for pixel whose edge type is 1
|
||||||
//
|
//
|
||||||
@@ -390,7 +398,7 @@ void
|
|||||||
__attribute__((reqd_work_group_size(16,16,1)))
|
__attribute__((reqd_work_group_size(16,16,1)))
|
||||||
edgesHysteresisLocal
|
edgesHysteresisLocal
|
||||||
(
|
(
|
||||||
__global int * map,
|
__global int * map_ptr,
|
||||||
__global ushort2 * st,
|
__global ushort2 * st,
|
||||||
__global unsigned int * counter,
|
__global unsigned int * counter,
|
||||||
int rows,
|
int rows,
|
||||||
@@ -399,10 +407,11 @@ edgesHysteresisLocal
|
|||||||
int map_offset
|
int map_offset
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
map_step /= sizeof(*map);
|
map_step /= sizeof(*map);
|
||||||
map_offset /= sizeof(*map);
|
map_offset /= sizeof(*map);
|
||||||
|
|
||||||
map += map_offset;
|
const __global int* map = map_ptr + map_offset;
|
||||||
|
|
||||||
__local int smem[18][18];
|
__local int smem[18][18];
|
||||||
|
|
||||||
@@ -482,6 +491,92 @@ edgesHysteresisLocal
|
|||||||
st[ind] = (ushort2)(gidx + 1, gidy + 1);
|
st[ind] = (ushort2)(gidx + 1, gidy + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
struct PtrStepSz map = {((__global int *)((__global char*)map_ptr + map_offset)), map_step, rows, cols};
|
||||||
|
|
||||||
|
__local int smem[18][18];
|
||||||
|
|
||||||
|
int2 blockIdx = (int2)(get_group_id(0), get_group_id(1));
|
||||||
|
int2 blockDim = (int2)(get_local_size(0), get_local_size(1));
|
||||||
|
int2 threadIdx = (int2)(get_local_id(0), get_local_id(1));
|
||||||
|
|
||||||
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
|
smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? get(map, y, x) : 0;
|
||||||
|
if (threadIdx.y == 0)
|
||||||
|
smem[0][threadIdx.x + 1] = y > 0 ? get(map, y - 1, x) : 0;
|
||||||
|
if (threadIdx.y == blockDim.y - 1)
|
||||||
|
smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? get(map, y + 1, x) : 0;
|
||||||
|
if (threadIdx.x == 0)
|
||||||
|
smem[threadIdx.y + 1][0] = x > 0 ? get(map, y, x - 1) : 0;
|
||||||
|
if (threadIdx.x == blockDim.x - 1)
|
||||||
|
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols ? get(map, y, x + 1) : 0;
|
||||||
|
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||||
|
smem[0][0] = y > 0 && x > 0 ? get(map, y - 1, x - 1) : 0;
|
||||||
|
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0)
|
||||||
|
smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? get(map, y - 1, x + 1) : 0;
|
||||||
|
if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1)
|
||||||
|
smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? get(map, y + 1, x - 1) : 0;
|
||||||
|
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1)
|
||||||
|
smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? get(map, y + 1, x + 1) : 0;
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
if (x >= map.cols || y >= map.rows)
|
||||||
|
return;
|
||||||
|
|
||||||
|
int n;
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int k = 0; k < 16; ++k)
|
||||||
|
{
|
||||||
|
n = 0;
|
||||||
|
|
||||||
|
if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1)
|
||||||
|
{
|
||||||
|
n += smem[threadIdx.y ][threadIdx.x ] == 2;
|
||||||
|
n += smem[threadIdx.y ][threadIdx.x + 1] == 2;
|
||||||
|
n += smem[threadIdx.y ][threadIdx.x + 2] == 2;
|
||||||
|
|
||||||
|
n += smem[threadIdx.y + 1][threadIdx.x ] == 2;
|
||||||
|
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2;
|
||||||
|
|
||||||
|
n += smem[threadIdx.y + 2][threadIdx.x ] == 2;
|
||||||
|
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2;
|
||||||
|
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n > 0)
|
||||||
|
smem[threadIdx.y + 1][threadIdx.x + 1] = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int e = smem[threadIdx.y + 1][threadIdx.x + 1];
|
||||||
|
|
||||||
|
set(map, y, x, e);
|
||||||
|
|
||||||
|
n = 0;
|
||||||
|
|
||||||
|
if (e == 2)
|
||||||
|
{
|
||||||
|
n += smem[threadIdx.y ][threadIdx.x ] == 1;
|
||||||
|
n += smem[threadIdx.y ][threadIdx.x + 1] == 1;
|
||||||
|
n += smem[threadIdx.y ][threadIdx.x + 2] == 1;
|
||||||
|
|
||||||
|
n += smem[threadIdx.y + 1][threadIdx.x ] == 1;
|
||||||
|
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1;
|
||||||
|
|
||||||
|
n += smem[threadIdx.y + 2][threadIdx.x ] == 1;
|
||||||
|
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1;
|
||||||
|
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n > 0)
|
||||||
|
{
|
||||||
|
const int ind = atomic_inc(counter);
|
||||||
|
st[ind] = (ushort2)(x, y);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
||||||
|
|||||||
Reference in New Issue
Block a user