diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index dc27e2ace..17dbcc662 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -676,7 +676,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub const int pixelsPerWI = 4; int group_size = (src.cols + pixelsPerWI - 1)/pixelsPerWI; ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, - format("-D MAKE_POINT_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE", group_size, src.cols)); + format("-D MAKE_POINT_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", group_size, src.cols)); if (pointListKernel.empty()) return false; @@ -703,13 +703,24 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub UMat accum(numangle + 2, numrho + 2, CV_32SC1, Scalar::all(0)); fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), - ocl::KernelArg::Constant(&total_points, sizeof(int)), ocl::KernelArg::Constant(&irho, sizeof(float)), - ocl::KernelArg::Constant(&theta, sizeof(float)), ocl::KernelArg::Constant(&numrho, sizeof(int))); - globalThreads[0] = numangle; globalThreads[1] = group_size; + total_points, irho, (float) theta, numrho, numangle); + globalThreads[0] = group_size; globalThreads[1] = numangle; if (!fillAccumKernel.run(2, globalThreads, NULL, false)) return false; - + printf("GPU: \n"); + int sum = 0; + Mat ac = accum.getMat(ACCESS_READ); + for (int i=0; i<8; i++) + { + for (int j=0; j<8; j++) + { + sum += ac.at(i, j); + printf("%d ", ac.at(i, j)); + } + printf("\n"); + } + printf("sum = %d\n", sum); return false; } diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 80d1604bd..756416309 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -13,7 +13,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int int x = get_local_id(0); int y = get_group_id(1); - __local int l_index; + __local int l_index, l_offset; __local int l_points[LOCAL_SIZE]; __global const uchar * src = src_ptr + mad24(y, src_step, src_offset); __global int * list = (__global int*)(list_ptr + list_offset); @@ -38,13 +38,12 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int barrier(CLK_LOCAL_MEM_FENCE); - int offset; if (x == 0) - offset = atomic_add(global_offset, l_index); + l_offset = atomic_add(global_offset, l_index); barrier(CLK_LOCAL_MEM_FENCE); - list += offset; + list += l_offset; for (int i=x; i < l_index; i+=GROUP_SIZE) { list[i] = l_points[i]; @@ -55,27 +54,31 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int __kernel void fill_accum(__global const uchar * list_ptr, int list_step, int list_offset, __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, - int count, float irho, float theta, int numrho) + int count, float irho, float theta, int numrho, int numangle) { - int theta_idx = get_global_id(0); - int count_idx = get_global_id(1); + int theta_idx = get_global_id(1); + int count_idx = get_global_id(0); + int glob_size = get_global_size(0); float cosVal; - float sinVal = sincos(theta * theta_idx, &cosVal); + float sinVal = sincos(theta * ((float)theta_idx), &cosVal); sinVal *= irho; cosVal *= irho; __global const int * list = (__global const int*)(list_ptr + list_offset); - __global int* accum = (__global int*)(accum_ptr + mad24(theta_idx, accum_step, accum_offset)); + __global int* accum = (__global int*)(accum_ptr + mad24(theta_idx + 1, accum_step, accum_offset)); const int shift = (numrho - 1) / 2; - for (int i = count_idx; i < count; i += GROUP_SIZE) + if (theta_idx < numangle) { - const int val = list[i]; - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; + for (int i = count_idx; i < count; i += glob_size) + { + const int val = list[i]; + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; - int r = round(x * cosVal + y * sinVal) + shift; - atomic_inc(accum + r + 1); + int r = convert_int_rte(x * cosVal + y * sinVal) + shift; + atomic_inc(accum + r + 1); + } } } diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp index 80b07a0b7..00577b72b 100644 --- a/modules/imgproc/test/ocl/test_houghlines.cpp +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -26,7 +26,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, bool) virtual void SetUp() { rhoStep = 10; - thetaStep = 0.1; + thetaStep = 0.5; threshold = 80; useRoi = false; } @@ -35,7 +35,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, bool) { //Mat image = readImage("shared/pic1.png", IMREAD_GRAYSCALE); - Mat image = randomMat(Size(100, 100), CV_8UC1, 0, 255, false); + Mat image = randomMat(Size(20, 10), CV_8UC1, 0, 255, false); cv::threshold(image, src, 127, 255, THRESH_BINARY); //Canny(image, src, 100, 150, 3);