From 5c1f71de511271955b41f99f3a36510f6a2e19e5 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Fri, 22 Aug 2014 16:50:01 +0400 Subject: [PATCH 1/9] Added make_point_list kernel --- modules/imgproc/src/hough.cpp | 64 +++++++++++++++ modules/imgproc/src/opencl/hough_lines.cl | 83 ++++++++++++++++++++ modules/imgproc/test/ocl/test_houghlines.cpp | 62 +++++++++++++++ 3 files changed, 209 insertions(+) create mode 100644 modules/imgproc/src/opencl/hough_lines.cl create mode 100644 modules/imgproc/test/ocl/test_houghlines.cpp diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 5d5dde27d..dc27e2ace 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -42,6 +42,7 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels_imgproc.hpp" namespace cv { @@ -652,13 +653,76 @@ HoughLinesProbabilistic( Mat& image, } } + +static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold, + double min_theta, double max_theta) +{ + CV_Assert(_src.type() == CV_8UC1); + + if (max_theta < 0 || max_theta > CV_PI ) { + CV_Error( CV_StsBadArg, "max_theta must fall between 0 and pi" ); + } + if (min_theta < 0 || min_theta > max_theta ) { + CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" ); + } + + UMat src = _src.getUMat(); + + float irho = 1 / rho; + int numangle = cvRound((max_theta - min_theta) / theta); + int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); + + // make list of nonzero points + const int pixelsPerWI = 4; + int group_size = (src.cols + pixelsPerWI - 1)/pixelsPerWI; + ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, + format("-D MAKE_POINT_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE", group_size, src.cols)); + if (pointListKernel.empty()) + return false; + + UMat pointsList(1, src.total(), CV_32SC1); + UMat total(1, 1, CV_32SC1, Scalar::all(0)); + pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), + ocl::KernelArg::PtrWriteOnly(total)); + size_t localThreads[2] = { group_size, 1 }; + size_t globalThreads[2] = { group_size, src.rows }; + + if (!pointListKernel.run(2, globalThreads, localThreads, false)) + return false; + + int total_points = total.getMat(ACCESS_READ).at(0, 0); + if (total_points <= 0) + return false; + + // convert src to hough space + group_size = (total_points + pixelsPerWI - 1)/pixelsPerWI; + ocl::Kernel fillAccumKernel("fill_accum", ocl::imgproc::hough_lines_oclsrc, + format("-D FILL_ACCUM -D GROUP_SIZE=%d", group_size)); + if (fillAccumKernel.empty()) + return false; + + UMat accum(numangle + 2, numrho + 2, CV_32SC1, Scalar::all(0)); + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), + ocl::KernelArg::Constant(&total_points, sizeof(int)), ocl::KernelArg::Constant(&irho, sizeof(float)), + ocl::KernelArg::Constant(&theta, sizeof(float)), ocl::KernelArg::Constant(&numrho, sizeof(int))); + globalThreads[0] = numangle; globalThreads[1] = group_size; + + if (!fillAccumKernel.run(2, globalThreads, NULL, false)) + return false; + + + return false; } +} void cv::HoughLines( InputArray _image, OutputArray _lines, double rho, double theta, int threshold, double srn, double stn, double min_theta, double max_theta ) { + CV_OCL_RUN(srn == 0 && stn == 0 && _lines.isUMat(), + ocl_HoughLines(_image, _lines, rho, theta, threshold, min_theta, max_theta)); + Mat image = _image.getMat(); std::vector lines; diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl new file mode 100644 index 000000000..80d1604bd --- /dev/null +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -0,0 +1,83 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifdef MAKE_POINT_LIST + +__kernel void make_point_list(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * list_ptr, int list_step, int list_offset, __global int* global_offset) +{ + int x = get_local_id(0); + int y = get_group_id(1); + + __local int l_index; + __local int l_points[LOCAL_SIZE]; + __global const uchar * src = src_ptr + mad24(y, src_step, src_offset); + __global int * list = (__global int*)(list_ptr + list_offset); + + if (x == 0) + l_index = 0; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (y < src_rows) + { + for (int i=x; i < src_cols; i+=GROUP_SIZE) + { + if (src[i]) + { + int val = (y << 16) | i; + int index = atomic_inc(&l_index); + l_points[index] = val; + } + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + int offset; + if (x == 0) + offset = atomic_add(global_offset, l_index); + + barrier(CLK_LOCAL_MEM_FENCE); + + list += offset; + for (int i=x; i < l_index; i+=GROUP_SIZE) + { + list[i] = l_points[i]; + } +} + +#elif defined FILL_ACCUM + +__kernel void fill_accum(__global const uchar * list_ptr, int list_step, int list_offset, + __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, + int count, float irho, float theta, int numrho) +{ + int theta_idx = get_global_id(0); + int count_idx = get_global_id(1); + float cosVal; + float sinVal = sincos(theta * theta_idx, &cosVal); + sinVal *= irho; + cosVal *= irho; + + __global const int * list = (__global const int*)(list_ptr + list_offset); + __global int* accum = (__global int*)(accum_ptr + mad24(theta_idx, accum_step, accum_offset)); + const int shift = (numrho - 1) / 2; + + for (int i = count_idx; i < count; i += GROUP_SIZE) + { + const int val = list[i]; + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + int r = round(x * cosVal + y * sinVal) + shift; + atomic_inc(accum + r + 1); + } +} + +#endif + diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp new file mode 100644 index 000000000..80b07a0b7 --- /dev/null +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -0,0 +1,62 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#include "../test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +PARAM_TEST_CASE(HoughLinesTestBase, bool) +{ + double rhoStep; + double thetaStep; + int threshold; + bool useRoi; + + Mat src, dst; + UMat usrc, udst; + + virtual void SetUp() + { + rhoStep = 10; + thetaStep = 0.1; + threshold = 80; + useRoi = false; + } + + virtual void generateTestData() + { + //Mat image = readImage("shared/pic1.png", IMREAD_GRAYSCALE); + + Mat image = randomMat(Size(100, 100), CV_8UC1, 0, 255, false); + + cv::threshold(image, src, 127, 255, THRESH_BINARY); + //Canny(image, src, 100, 150, 3); + src.copyTo(usrc); + } +}; + +typedef HoughLinesTestBase HoughLines; + +OCL_TEST_P(HoughLines, RealImage) +{ + generateTestData(); + + //std::cout << src << std::endl; + + OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold, 0, 0)); + OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold, 0, 0)); +} + +OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Values(true, false)); + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL \ No newline at end of file From 038bfb98ec85b5e260255a4e55378d5c34c92790 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 25 Aug 2014 13:55:09 +0400 Subject: [PATCH 2/9] Added fill_accum kernel --- modules/imgproc/src/hough.cpp | 21 ++++++++++--- modules/imgproc/src/opencl/hough_lines.cl | 33 +++++++++++--------- modules/imgproc/test/ocl/test_houghlines.cpp | 4 +-- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index dc27e2ace..17dbcc662 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -676,7 +676,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub const int pixelsPerWI = 4; int group_size = (src.cols + pixelsPerWI - 1)/pixelsPerWI; ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, - format("-D MAKE_POINT_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE", group_size, src.cols)); + format("-D MAKE_POINT_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", group_size, src.cols)); if (pointListKernel.empty()) return false; @@ -703,13 +703,24 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub UMat accum(numangle + 2, numrho + 2, CV_32SC1, Scalar::all(0)); fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), - ocl::KernelArg::Constant(&total_points, sizeof(int)), ocl::KernelArg::Constant(&irho, sizeof(float)), - ocl::KernelArg::Constant(&theta, sizeof(float)), ocl::KernelArg::Constant(&numrho, sizeof(int))); - globalThreads[0] = numangle; globalThreads[1] = group_size; + total_points, irho, (float) theta, numrho, numangle); + globalThreads[0] = group_size; globalThreads[1] = numangle; if (!fillAccumKernel.run(2, globalThreads, NULL, false)) return false; - + printf("GPU: \n"); + int sum = 0; + Mat ac = accum.getMat(ACCESS_READ); + for (int i=0; i<8; i++) + { + for (int j=0; j<8; j++) + { + sum += ac.at(i, j); + printf("%d ", ac.at(i, j)); + } + printf("\n"); + } + printf("sum = %d\n", sum); return false; } diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 80d1604bd..756416309 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -13,7 +13,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int int x = get_local_id(0); int y = get_group_id(1); - __local int l_index; + __local int l_index, l_offset; __local int l_points[LOCAL_SIZE]; __global const uchar * src = src_ptr + mad24(y, src_step, src_offset); __global int * list = (__global int*)(list_ptr + list_offset); @@ -38,13 +38,12 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int barrier(CLK_LOCAL_MEM_FENCE); - int offset; if (x == 0) - offset = atomic_add(global_offset, l_index); + l_offset = atomic_add(global_offset, l_index); barrier(CLK_LOCAL_MEM_FENCE); - list += offset; + list += l_offset; for (int i=x; i < l_index; i+=GROUP_SIZE) { list[i] = l_points[i]; @@ -55,27 +54,31 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int __kernel void fill_accum(__global const uchar * list_ptr, int list_step, int list_offset, __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, - int count, float irho, float theta, int numrho) + int count, float irho, float theta, int numrho, int numangle) { - int theta_idx = get_global_id(0); - int count_idx = get_global_id(1); + int theta_idx = get_global_id(1); + int count_idx = get_global_id(0); + int glob_size = get_global_size(0); float cosVal; - float sinVal = sincos(theta * theta_idx, &cosVal); + float sinVal = sincos(theta * ((float)theta_idx), &cosVal); sinVal *= irho; cosVal *= irho; __global const int * list = (__global const int*)(list_ptr + list_offset); - __global int* accum = (__global int*)(accum_ptr + mad24(theta_idx, accum_step, accum_offset)); + __global int* accum = (__global int*)(accum_ptr + mad24(theta_idx + 1, accum_step, accum_offset)); const int shift = (numrho - 1) / 2; - for (int i = count_idx; i < count; i += GROUP_SIZE) + if (theta_idx < numangle) { - const int val = list[i]; - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; + for (int i = count_idx; i < count; i += glob_size) + { + const int val = list[i]; + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; - int r = round(x * cosVal + y * sinVal) + shift; - atomic_inc(accum + r + 1); + int r = convert_int_rte(x * cosVal + y * sinVal) + shift; + atomic_inc(accum + r + 1); + } } } diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp index 80b07a0b7..00577b72b 100644 --- a/modules/imgproc/test/ocl/test_houghlines.cpp +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -26,7 +26,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, bool) virtual void SetUp() { rhoStep = 10; - thetaStep = 0.1; + thetaStep = 0.5; threshold = 80; useRoi = false; } @@ -35,7 +35,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, bool) { //Mat image = readImage("shared/pic1.png", IMREAD_GRAYSCALE); - Mat image = randomMat(Size(100, 100), CV_8UC1, 0, 255, false); + Mat image = randomMat(Size(20, 10), CV_8UC1, 0, 255, false); cv::threshold(image, src, 127, 255, THRESH_BINARY); //Canny(image, src, 100, 150, 3); From f7aadd07f66fa5096bfce6580a11d4f8b3181ac8 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 25 Aug 2014 15:57:58 +0400 Subject: [PATCH 3/9] Added getLines, fill_accum_local kernels --- modules/cudaimgproc/src/generalized_hough.cpp | 5 +- modules/imgproc/src/hough.cpp | 76 +++++++++++------ modules/imgproc/src/opencl/hough_lines.cl | 81 ++++++++++++++++++- modules/imgproc/test/ocl/test_houghlines.cpp | 51 ++++++++---- 4 files changed, 169 insertions(+), 44 deletions(-) diff --git a/modules/cudaimgproc/src/generalized_hough.cpp b/modules/cudaimgproc/src/generalized_hough.cpp index d68b76e62..9810bed3a 100644 --- a/modules/cudaimgproc/src/generalized_hough.cpp +++ b/modules/cudaimgproc/src/generalized_hough.cpp @@ -239,8 +239,9 @@ namespace void GeneralizedHoughBase::detectImpl(InputArray image, OutputArray positions, OutputArray votes) { #ifndef HAVE_OPENCV_CUDAFILTERS - (void) templ; - (void) templCenter; + (void) image; + (void) positions; + (void) votes; throw_no_cuda(); #else calcEdges(image, imageEdges_, imageDx_, imageDy_); diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 17dbcc662..d0440d231 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -668,9 +668,10 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub UMat src = _src.getUMat(); - float irho = 1 / rho; + float irho = (float) (1 / rho); int numangle = cvRound((max_theta - min_theta) / theta); int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); + ocl::Device dev = ocl::Device::getDefault(); // make list of nonzero points const int pixelsPerWI = 4; @@ -680,7 +681,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub if (pointListKernel.empty()) return false; - UMat pointsList(1, src.total(), CV_32SC1); + UMat pointsList(1, (int) src.total(), CV_32SC1); UMat total(1, 1, CV_32SC1, Scalar::all(0)); pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), ocl::KernelArg::PtrWriteOnly(total)); @@ -692,37 +693,66 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub int total_points = total.getMat(ACCESS_READ).at(0, 0); if (total_points <= 0) - return false; + { + _lines.assign(UMat(0,0,CV_32FC2)); + return true; + } // convert src to hough space - group_size = (total_points + pixelsPerWI - 1)/pixelsPerWI; - ocl::Kernel fillAccumKernel("fill_accum", ocl::imgproc::hough_lines_oclsrc, - format("-D FILL_ACCUM -D GROUP_SIZE=%d", group_size)); + group_size = min((int) dev.maxWorkGroupSize(), total_points); + int local_memory_needed = (numrho + 2)*sizeof(int); + ocl::Kernel fillAccumKernel; + globalThreads[0] = group_size; globalThreads[1] = numangle; + size_t* fillAccumLT = NULL; + + UMat accum(numangle + 2, numrho + 2, CV_32SC1); + + if (local_memory_needed > dev.localMemSize()) + { + fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc, + format("-D FILL_ACCUM_GLOBAL")); + accum.setTo(Scalar::all(0)); + } + else + { + fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc, + format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", group_size, numrho + 2)); + localThreads[0] = group_size; localThreads[1] = 1; + fillAccumLT = localThreads; + } if (fillAccumKernel.empty()) return false; - UMat accum(numangle + 2, numrho + 2, CV_32SC1, Scalar::all(0)); + int linesMax = min(total_points*numangle/threshold, 4096); + UMat lines(linesMax, 1, CV_32FC2); + UMat lines_count(1, 1, CV_32SC1, Scalar::all(0)); + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), total_points, irho, (float) theta, numrho, numangle); - globalThreads[0] = group_size; globalThreads[1] = numangle; - if (!fillAccumKernel.run(2, globalThreads, NULL, false)) + + if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false)) return false; - printf("GPU: \n"); - int sum = 0; - Mat ac = accum.getMat(ACCESS_READ); - for (int i=0; i<8; i++) - { - for (int j=0; j<8; j++) - { - sum += ac.at(i, j); - printf("%d ", ac.at(i, j)); - } - printf("\n"); - } - printf("sum = %d\n", sum); - return false; + ocl::Kernel getLinesKernel("get_lines", ocl::imgproc::hough_lines_oclsrc, + format("-D GET_LINES")); + if (getLinesKernel.empty()) + return false; + + globalThreads[0] = numrho; globalThreads[1] = numangle; + getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), + ocl::KernelArg::PtrWriteOnly(lines_count), linesMax, threshold, (float) rho, (float) theta); + + if (!getLinesKernel.run(2, globalThreads, NULL, false)) + return false; + + + int total_lines = min(lines_count.getMat(ACCESS_READ).at(0, 0), linesMax); + if (total_lines > 0) + _lines.assign(lines.rowRange(Range(0, total_lines))); + else + _lines.assign(UMat(0,0,CV_32FC2)); + return true; } } diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 756416309..2b8311567 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -50,9 +50,9 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int } } -#elif defined FILL_ACCUM +#elif defined FILL_ACCUM_GLOBAL -__kernel void fill_accum(__global const uchar * list_ptr, int list_step, int list_offset, +__kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset, __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, int count, float irho, float theta, int numrho, int numangle) { @@ -82,5 +82,82 @@ __kernel void fill_accum(__global const uchar * list_ptr, int list_step, int lis } } +#elif defined FILL_ACCUM_LOCAL + +__kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset, + __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, + int count, float irho, float theta, int numrho, int numangle) +{ + int theta_idx = get_global_id(1); + int count_idx = get_local_id(0); + + float cosVal; + float sinVal = sincos(theta * ((float)theta_idx), &cosVal); + sinVal *= irho; + cosVal *= irho; + + __local int l_accum[BUFFER_SIZE]; + for (int i=count_idx; i> 16) & 0xFFFF; + + int r = convert_int_rte(x * cosVal + y * sinVal) + shift; + atomic_inc(l_accum + r + 1); + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + __global int* accum = (__global int*)(accum_ptr + mad24(theta_idx + 1, accum_step, accum_offset)); + for (int i=count_idx; i threshold && curVote > ACCUM(accum - sizeof(int)) && curVote >= ACCUM(accum + sizeof(int)) && + curVote > ACCUM(accum - accum_step) && curVote >= ACCUM(accum + accum_step)) + { + int index = atomic_inc(lines_index); + + if (index < linesMax) + { + float radius = (x - (accum_cols - 3) * 0.5f) * rho; + float angle = y * theta; + + lines[index] = (float2)(radius, angle); + } + } + } +} + #endif diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp index 00577b72b..2d78c0403 100644 --- a/modules/imgproc/test/ocl/test_houghlines.cpp +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -13,49 +13,66 @@ namespace cvtest { namespace ocl { -PARAM_TEST_CASE(HoughLinesTestBase, bool) +struct Vec2fComparator +{ + bool operator()(const cv::Vec2f& a, const cv::Vec2f b) const + { + if(a[0] != b[0]) return a[0] < b[0]; + else return a[1] < b[1]; + } +}; + +PARAM_TEST_CASE(HoughLinesTestBase, double, double, int) { double rhoStep; double thetaStep; int threshold; - bool useRoi; + Size src_size; Mat src, dst; UMat usrc, udst; virtual void SetUp() { - rhoStep = 10; - thetaStep = 0.5; - threshold = 80; - useRoi = false; + rhoStep = GET_PARAM(0); + thetaStep = GET_PARAM(1); + threshold = GET_PARAM(2); } virtual void generateTestData() { - //Mat image = readImage("shared/pic1.png", IMREAD_GRAYSCALE); + src_size = randomSize(500, 1000); + src.create(src_size, CV_8UC1); + src.setTo(Scalar::all(0)); + line(src, Point(0, 100), Point(100, 100), Scalar::all(255), 1); + line(src, Point(0, 200), Point(100, 200), Scalar::all(255), 1); + line(src, Point(0, 400), Point(100, 400), Scalar::all(255), 1); + line(src, Point(100, 0), Point(100, 200), Scalar::all(255), 1); + line(src, Point(200, 0), Point(200, 200), Scalar::all(255), 1); + line(src, Point(400, 0), Point(400, 200), Scalar::all(255), 1); - Mat image = randomMat(Size(20, 10), CV_8UC1, 0, 255, false); - - cv::threshold(image, src, 127, 255, THRESH_BINARY); - //Canny(image, src, 100, 150, 3); src.copyTo(usrc); } }; typedef HoughLinesTestBase HoughLines; -OCL_TEST_P(HoughLines, RealImage) +OCL_TEST_P(HoughLines, GeneratedImage) { - generateTestData(); + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); - //std::cout << src << std::endl; + OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold)); + OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold)); - OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold, 0, 0)); - OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold, 0, 0)); + //Near(1e-5); + } } -OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Values(true, false)); +OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep + Values(CV_PI / 180.0, CV_PI / 360.0), // thetaStep + Values(80, 150))); // threshold } } // namespace cvtest::ocl From 6b6c7ccfea4a8b83e37983ff796e8296b8e9cd50 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Wed, 27 Aug 2014 17:58:48 +0400 Subject: [PATCH 4/9] Added accuracy tests with real and generated data --- modules/imgproc/test/ocl/test_houghlines.cpp | 35 ++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp index 2d78c0403..ad27e1078 100644 --- a/modules/imgproc/test/ocl/test_houghlines.cpp +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -53,10 +53,45 @@ PARAM_TEST_CASE(HoughLinesTestBase, double, double, int) src.copyTo(usrc); } + + virtual void readRealTestData() + { + Mat img = readImage("shared/pic5.png", IMREAD_GRAYSCALE); + Canny(img, src, 100, 150, 3); + + src.copyTo(usrc); + } + + virtual void Near(double eps = 0.) + { + EXPECT_EQ(dst.size(), udst.size()); + + if (dst.total() > 0) + { + Mat lines_cpu, lines_gpu; + dst.copyTo(lines_cpu); + udst.copyTo(lines_gpu); + + std::sort(lines_cpu.begin(), lines_cpu.end(), Vec2fComparator()); + std::sort(lines_gpu.begin(), lines_gpu.end(), Vec2fComparator()); + + EXPECT_LE(TestUtils::checkNorm2(lines_cpu, lines_gpu), eps); + } + } }; typedef HoughLinesTestBase HoughLines; +OCL_TEST_P(HoughLines, RealImage) +{ + readRealTestData(); + + OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold)); + OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold)); + + Near(1e-5); +} + OCL_TEST_P(HoughLines, GeneratedImage) { for (int j = 0; j < test_loop_times; j++) From fee8f29f485891251223ebe3683008d8318d505b Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Thu, 4 Sep 2014 16:31:30 +0400 Subject: [PATCH 5/9] Refactoring, minor optimization --- modules/imgproc/src/hough.cpp | 55 ++++++++------- modules/imgproc/src/opencl/hough_lines.cl | 70 +++++++++++--------- modules/imgproc/test/ocl/test_houghlines.cpp | 4 +- 3 files changed, 67 insertions(+), 62 deletions(-) diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index d0440d231..37ca4729c 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -674,63 +674,57 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub ocl::Device dev = ocl::Device::getDefault(); // make list of nonzero points - const int pixelsPerWI = 4; - int group_size = (src.cols + pixelsPerWI - 1)/pixelsPerWI; + const int pixelsPerWI = 8; + int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI); ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, - format("-D MAKE_POINT_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", group_size, src.cols)); + format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols)); if (pointListKernel.empty()) return false; UMat pointsList(1, (int) src.total(), CV_32SC1); - UMat total(1, 1, CV_32SC1, Scalar::all(0)); + UMat counters(1, 2, CV_32SC1, Scalar::all(0)); pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), - ocl::KernelArg::PtrWriteOnly(total)); - size_t localThreads[2] = { group_size, 1 }; - size_t globalThreads[2] = { group_size, src.rows }; + ocl::KernelArg::PtrWriteOnly(counters)); + size_t localThreads[2] = { workgroup_size, 1 }; + size_t globalThreads[2] = { workgroup_size, src.rows }; if (!pointListKernel.run(2, globalThreads, localThreads, false)) return false; - int total_points = total.getMat(ACCESS_READ).at(0, 0); + int total_points = counters.getMat(ACCESS_READ).at(0, 0); if (total_points <= 0) { _lines.assign(UMat(0,0,CV_32FC2)); return true; } - // convert src to hough space - group_size = min((int) dev.maxWorkGroupSize(), total_points); - int local_memory_needed = (numrho + 2)*sizeof(int); - ocl::Kernel fillAccumKernel; - globalThreads[0] = group_size; globalThreads[1] = numangle; - size_t* fillAccumLT = NULL; - + // convert src image to hough space UMat accum(numangle + 2, numrho + 2, CV_32SC1); - + workgroup_size = min((int) dev.maxWorkGroupSize(), total_points); + ocl::Kernel fillAccumKernel; + size_t* fillAccumLT = NULL; + int local_memory_needed = (numrho + 2)*sizeof(int); if (local_memory_needed > dev.localMemSize()) { + accum.setTo(Scalar::all(0)); fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc, format("-D FILL_ACCUM_GLOBAL")); - accum.setTo(Scalar::all(0)); + globalThreads[0] = workgroup_size; globalThreads[1] = numangle; } else { fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc, - format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", group_size, numrho + 2)); - localThreads[0] = group_size; localThreads[1] = 1; + format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", workgroup_size, numrho + 2)); + localThreads[0] = workgroup_size; localThreads[1] = 1; + globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2; fillAccumLT = localThreads; } if (fillAccumKernel.empty()) return false; - int linesMax = min(total_points*numangle/threshold, 4096); - UMat lines(linesMax, 1, CV_32FC2); - UMat lines_count(1, 1, CV_32SC1, Scalar::all(0)); - fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), total_points, irho, (float) theta, numrho, numangle); - if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false)) return false; @@ -739,15 +733,18 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub if (getLinesKernel.empty()) return false; - globalThreads[0] = numrho; globalThreads[1] = numangle; - getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), - ocl::KernelArg::PtrWriteOnly(lines_count), linesMax, threshold, (float) rho, (float) theta); + // TODO: investigate other strategies to choose linesMax + int linesMax = min(total_points*numangle/threshold, 4096); + UMat lines(linesMax, 1, CV_32FC2); + getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), + ocl::KernelArg::PtrWriteOnly(counters), linesMax, threshold, (float) rho, (float) theta); + + globalThreads[0] = numrho; globalThreads[1] = numangle; if (!getLinesKernel.run(2, globalThreads, NULL, false)) return false; - - int total_lines = min(lines_count.getMat(ACCESS_READ).at(0, 0), linesMax); + int total_lines = min(counters.getMat(ACCESS_READ).at(0, 1), linesMax); if (total_lines > 0) _lines.assign(lines.rowRange(Range(0, total_lines))); else diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 2b8311567..40f537201 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -5,7 +5,7 @@ // Copyright (C) 2014, Itseez, Inc., all rights reserved. // Third party copyrights are property of their respective owners. -#ifdef MAKE_POINT_LIST +#ifdef MAKE_POINTS_LIST __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * list_ptr, int list_step, int list_offset, __global int* global_offset) @@ -54,7 +54,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset, __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, - int count, float irho, float theta, int numrho, int numangle) + int total_points, float irho, float theta, int numrho, int numangle) { int theta_idx = get_global_id(1); int count_idx = get_global_id(0); @@ -70,7 +70,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, if (theta_idx < numangle) { - for (int i = count_idx; i < count; i += glob_size) + for (int i = count_idx; i < total_points; i += glob_size) { const int val = list[i]; const int x = (val & 0xFFFF); @@ -86,43 +86,50 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset, __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, - int count, float irho, float theta, int numrho, int numangle) + int total_points, float irho, float theta, int numrho, int numangle) { int theta_idx = get_global_id(1); int count_idx = get_local_id(0); - - float cosVal; - float sinVal = sincos(theta * ((float)theta_idx), &cosVal); - sinVal *= irho; - cosVal *= irho; - - __local int l_accum[BUFFER_SIZE]; - for (int i=count_idx; i 0 && theta_idx < numangle + 1) { - for (int i = count_idx; i < count; i += LOCAL_SIZE) + float cosVal; + float sinVal = sincos(theta * (float) (theta_idx-1), &cosVal); + sinVal *= irho; + cosVal *= irho; + + __local int l_accum[BUFFER_SIZE]; + for (int i=count_idx; i> 16) & 0xFFFF; + const int point = list[i]; + const int x = (point & 0xFFFF); + const int y = (point >> 16) & 0xFFFF; int r = convert_int_rte(x * cosVal + y * sinVal) + shift; atomic_inc(l_accum + r + 1); } + + barrier(CLK_LOCAL_MEM_FENCE); + + __global int* accum = (__global int*)(accum_ptr + mad24(theta_idx, accum_step, accum_offset)); + for (int i=count_idx; i Date: Fri, 5 Sep 2014 11:37:16 +0400 Subject: [PATCH 6/9] Optimization for getLines --- modules/imgproc/src/hough.cpp | 3 +- modules/imgproc/src/opencl/hough_lines.cl | 35 +++++++++++++---------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 37ca4729c..29ca551fb 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -728,6 +728,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false)) return false; + const int pixPerWI = 8; ocl::Kernel getLinesKernel("get_lines", ocl::imgproc::hough_lines_oclsrc, format("-D GET_LINES")); if (getLinesKernel.empty()) @@ -740,7 +741,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), ocl::KernelArg::PtrWriteOnly(counters), linesMax, threshold, (float) rho, (float) theta); - globalThreads[0] = numrho; globalThreads[1] = numangle; + globalThreads[0] = (numrho + pixPerWI - 1)/pixPerWI; globalThreads[1] = numangle; if (!getLinesKernel.run(2, globalThreads, NULL, false)) return false; diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 40f537201..25ebe4708 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -88,7 +88,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, int total_points, float irho, float theta, int numrho, int numangle) { - int theta_idx = get_global_id(1); + int theta_idx = get_group_id(1); int count_idx = get_local_id(0); if (theta_idx > 0 && theta_idx < numangle + 1) @@ -140,32 +140,37 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of __global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr, int linesMax, int threshold, float rho, float theta) { - int x = get_global_id(0); + int x0 = get_global_id(0); int y = get_global_id(1); + int gl_size = get_global_size(0); - if (x < accum_cols-2 && y < accum_rows-2) + if (y < accum_rows-2) { - __global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset)); + __global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x0+1, (int) sizeof(int), accum_offset)); __global float2* lines = (__global float2*)(lines_ptr + lines_offset); __global int* lines_index = lines_index_ptr + 1; - int curVote = ACCUM(accum); - - if (curVote > threshold && curVote > ACCUM(accum - sizeof(int)) && curVote >= ACCUM(accum + sizeof(int)) && - curVote > ACCUM(accum - accum_step) && curVote >= ACCUM(accum + accum_step)) + for (int x=x0; x threshold && curVote > ACCUM(accum - sizeof(int)) && curVote >= ACCUM(accum + sizeof(int)) && + curVote > ACCUM(accum - accum_step) && curVote >= ACCUM(accum + accum_step)) { - float radius = (x - (accum_cols - 3) * 0.5f) * rho; - float angle = y * theta; + int index = atomic_inc(lines_index); - lines[index] = (float2)(radius, angle); + if (index < linesMax) + { + float radius = (x - (accum_cols - 3) * 0.5f) * rho; + float angle = y * theta; + + lines[index] = (float2)(radius, angle); + } } + + accum += gl_size * (int) sizeof(int); } } } -#endif - +#endif \ No newline at end of file From 39b27a19bec6dc567bb246092a9ed522e2cf67c2 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Fri, 5 Sep 2014 12:20:29 +0400 Subject: [PATCH 7/9] Refactoring and optimization --- modules/imgproc/src/hough.cpp | 130 ++++++++++++++-------- modules/imgproc/src/opencl/hough_lines.cl | 5 +- 2 files changed, 84 insertions(+), 51 deletions(-) diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 29ca551fb..386c7e948 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -652,7 +652,70 @@ HoughLinesProbabilistic( Mat& image, } } } +static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters) +{ + UMat src = _src.getUMat(); + _pointsList.create(1, (int) src.total(), CV_32SC1); + UMat pointsList = _pointsList.getUMat(); + UMat counters = _counters.getUMat(); + ocl::Device dev = ocl::Device::getDefault(); + const int pixelsPerWI = 16; + int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI); + ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, + format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols)); + if (pointListKernel.empty()) + return false; + + pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), + ocl::KernelArg::PtrWriteOnly(counters)); + + size_t localThreads[2] = { workgroup_size, 1 }; + size_t globalThreads[2] = { workgroup_size, src.rows }; + + return pointListKernel.run(2, globalThreads, localThreads, false); +} + +static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_points, double rho, double theta, int numrho, int numangle) +{ + UMat pointsList = _pointsList.getUMat(); + _accum.create(numangle + 2, numrho + 2, CV_32SC1); + UMat accum = _accum.getUMat(); + ocl::Device dev = ocl::Device::getDefault(); + + float irho = (float) (1 / rho); + int workgroup_size = min((int) dev.maxWorkGroupSize(), total_points); + + ocl::Kernel fillAccumKernel; + size_t localThreads[2]; + size_t globalThreads[2]; + + int local_memory_needed = (numrho + 2)*sizeof(int); + if (local_memory_needed > dev.localMemSize()) + { + accum.setTo(Scalar::all(0)); + fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc, + format("-D FILL_ACCUM_GLOBAL")); + if (fillAccumKernel.empty()) + return false; + globalThreads[0] = workgroup_size; globalThreads[1] = numangle; + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), + total_points, irho, (float) theta, numrho, numangle); + return fillAccumKernel.run(2, globalThreads, NULL, false); + } + else + { + fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc, + format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", workgroup_size, numrho + 2)); + if (fillAccumKernel.empty()) + return false; + localThreads[0] = workgroup_size; localThreads[1] = 1; + globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2; + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), + total_points, irho, (float) theta, numrho, numangle); + return fillAccumKernel.run(2, globalThreads, localThreads, false); + } +} static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold, double min_theta, double max_theta) @@ -667,28 +730,13 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub } UMat src = _src.getUMat(); - - float irho = (float) (1 / rho); int numangle = cvRound((max_theta - min_theta) / theta); int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); - ocl::Device dev = ocl::Device::getDefault(); - - // make list of nonzero points - const int pixelsPerWI = 8; - int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI); - ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, - format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols)); - if (pointListKernel.empty()) - return false; - - UMat pointsList(1, (int) src.total(), CV_32SC1); + + UMat pointsList; UMat counters(1, 2, CV_32SC1, Scalar::all(0)); - pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), - ocl::KernelArg::PtrWriteOnly(counters)); - size_t localThreads[2] = { workgroup_size, 1 }; - size_t globalThreads[2] = { workgroup_size, src.rows }; - if (!pointListKernel.run(2, globalThreads, localThreads, false)) + if (!ocl_makePointsList(src, pointsList, counters)) return false; int total_points = counters.getMat(ACCESS_READ).at(0, 0); @@ -698,34 +746,8 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub return true; } - // convert src image to hough space - UMat accum(numangle + 2, numrho + 2, CV_32SC1); - workgroup_size = min((int) dev.maxWorkGroupSize(), total_points); - ocl::Kernel fillAccumKernel; - size_t* fillAccumLT = NULL; - int local_memory_needed = (numrho + 2)*sizeof(int); - if (local_memory_needed > dev.localMemSize()) - { - accum.setTo(Scalar::all(0)); - fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc, - format("-D FILL_ACCUM_GLOBAL")); - globalThreads[0] = workgroup_size; globalThreads[1] = numangle; - } - else - { - fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc, - format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", workgroup_size, numrho + 2)); - localThreads[0] = workgroup_size; localThreads[1] = 1; - globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2; - fillAccumLT = localThreads; - } - if (fillAccumKernel.empty()) - return false; - - fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), - total_points, irho, (float) theta, numrho, numangle); - - if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false)) + UMat accum; + if (!ocl_fillAccum(pointsList, accum, total_points, rho, theta, numrho, numangle)) return false; const int pixPerWI = 8; @@ -741,7 +763,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), ocl::KernelArg::PtrWriteOnly(counters), linesMax, threshold, (float) rho, (float) theta); - globalThreads[0] = (numrho + pixPerWI - 1)/pixPerWI; globalThreads[1] = numangle; + size_t globalThreads[2] = { (numrho + pixPerWI - 1)/pixPerWI, numangle }; if (!getLinesKernel.run(2, globalThreads, NULL, false)) return false; @@ -753,13 +775,23 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub return true; } +static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, double theta, int threshold, + double minLineLength, double maxGap) +{ + CV_Assert(_src.type() == CV_8UC1); + + UMat src = _src.getUMat(); + + return false; +} + } void cv::HoughLines( InputArray _image, OutputArray _lines, double rho, double theta, int threshold, double srn, double stn, double min_theta, double max_theta ) { - CV_OCL_RUN(srn == 0 && stn == 0 && _lines.isUMat(), + CV_OCL_RUN(srn == 0 && stn == 0 && _image.isUMat() && _lines.isUMat(), ocl_HoughLines(_image, _lines, rho, theta, threshold, min_theta, max_theta)); Mat image = _image.getMat(); @@ -778,6 +810,8 @@ void cv::HoughLinesP(InputArray _image, OutputArray _lines, double rho, double theta, int threshold, double minLineLength, double maxGap ) { + CV_OCL_RUN(_image.isUMat() && _lines.isUMat(), ocl_HoughLinesP(_image, _lines, rho, theta, threshold, minLineLength, maxGap)); + Mat image = _image.getMat(); std::vector lines; HoughLinesProbabilistic(image, (float)rho, (float)theta, threshold, cvRound(minLineLength), cvRound(maxGap), lines, INT_MAX); diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 25ebe4708..d402d3741 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -107,14 +107,13 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i __global const int * list = (__global const int*)(list_ptr + list_offset); const int shift = (numrho - 1) / 2; - for (int i = count_idx; i < total_points; i += LOCAL_SIZE) { const int point = list[i]; const int x = (point & 0xFFFF); - const int y = (point >> 16) & 0xFFFF; + const int y = point >> 16; - int r = convert_int_rte(x * cosVal + y * sinVal) + shift; + int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift; atomic_inc(l_accum + r + 1); } From eaf5a163b155650d87d7b22d6831ef36e94a2649 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Tue, 9 Sep 2014 12:50:12 +0400 Subject: [PATCH 8/9] Added HoughLinesP OCL implementation --- modules/imgproc/src/hough.cpp | 78 ++++++-- modules/imgproc/src/opencl/hough_lines.cl | 181 ++++++++++++++++++- modules/imgproc/test/ocl/test_houghlines.cpp | 86 ++++++++- 3 files changed, 314 insertions(+), 31 deletions(-) diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 386c7e948..7631b3bf2 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -652,6 +652,9 @@ HoughLinesProbabilistic( Mat& image, } } } + +#ifdef HAVE_OPENCL + static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters) { UMat src = _src.getUMat(); @@ -660,16 +663,16 @@ static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOu UMat counters = _counters.getUMat(); ocl::Device dev = ocl::Device::getDefault(); - const int pixelsPerWI = 16; - int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI); - ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, + const int pixPerWI = 16; + int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixPerWI - 1)/pixPerWI); + ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols)); if (pointListKernel.empty()) return false; pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), ocl::KernelArg::PtrWriteOnly(counters)); - + size_t localThreads[2] = { workgroup_size, 1 }; size_t globalThreads[2] = { workgroup_size, src.rows }; @@ -685,12 +688,12 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_ float irho = (float) (1 / rho); int workgroup_size = min((int) dev.maxWorkGroupSize(), total_points); - + ocl::Kernel fillAccumKernel; size_t localThreads[2]; size_t globalThreads[2]; - int local_memory_needed = (numrho + 2)*sizeof(int); + size_t local_memory_needed = (numrho + 2)*sizeof(int); if (local_memory_needed > dev.localMemSize()) { accum.setTo(Scalar::all(0)); @@ -717,7 +720,7 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_ } } -static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold, +static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold, double min_theta, double max_theta) { CV_Assert(_src.type() == CV_8UC1); @@ -732,7 +735,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub UMat src = _src.getUMat(); int numangle = cvRound((max_theta - min_theta) / theta); int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); - + UMat pointsList; UMat counters(1, 2, CV_32SC1, Scalar::all(0)); @@ -766,7 +769,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub size_t globalThreads[2] = { (numrho + pixPerWI - 1)/pixPerWI, numangle }; if (!getLinesKernel.run(2, globalThreads, NULL, false)) return false; - + int total_lines = min(counters.getMat(ACCESS_READ).at(0, 1), linesMax); if (total_lines > 0) _lines.assign(lines.rowRange(Range(0, total_lines))); @@ -775,23 +778,67 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub return true; } -static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, double theta, int threshold, +static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, double theta, int threshold, double minLineLength, double maxGap) { CV_Assert(_src.type() == CV_8UC1); - - UMat src = _src.getUMat(); - return false; + UMat src = _src.getUMat(); + int numangle = cvRound(CV_PI / theta); + int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); + + UMat pointsList; + UMat counters(1, 2, CV_32SC1, Scalar::all(0)); + + if (!ocl_makePointsList(src, pointsList, counters)) + return false; + + int total_points = counters.getMat(ACCESS_READ).at(0, 0); + if (total_points <= 0) + { + _lines.assign(UMat(0,0,CV_32SC4)); + return true; + } + + UMat accum; + if (!ocl_fillAccum(pointsList, accum, total_points, rho, theta, numrho, numangle)) + return false; + + ocl::Kernel getLinesKernel("get_lines", ocl::imgproc::hough_lines_oclsrc, + format("-D GET_LINES_PROBABOLISTIC")); + if (getLinesKernel.empty()) + return false; + + // TODO: investigate other strategies to choose linesMax + int linesMax = min(total_points*numangle/threshold, 4096); + UMat lines(linesMax, 1, CV_32SC4); + + getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::ReadOnly(src), + ocl::KernelArg::WriteOnlyNoSize(lines), ocl::KernelArg::PtrWriteOnly(counters), + linesMax, threshold, (int) minLineLength, (int) maxGap, (float) rho, (float) theta); + + size_t globalThreads[2] = { numrho, numangle }; + if (!getLinesKernel.run(2, globalThreads, NULL, false)) + return false; + + int total_lines = min(counters.getMat(ACCESS_READ).at(0, 1), linesMax); + if (total_lines > 0) + _lines.assign(lines.rowRange(Range(0, total_lines))); + else + _lines.assign(UMat(0,0,CV_32SC4)); + + return true; } +#endif /* HAVE_OPENCL */ + } void cv::HoughLines( InputArray _image, OutputArray _lines, double rho, double theta, int threshold, double srn, double stn, double min_theta, double max_theta ) { - CV_OCL_RUN(srn == 0 && stn == 0 && _image.isUMat() && _lines.isUMat(), + CV_OCL_RUN(srn == 0 && stn == 0 && _image.isUMat() && _lines.isUMat(), ocl_HoughLines(_image, _lines, rho, theta, threshold, min_theta, max_theta)); Mat image = _image.getMat(); @@ -810,7 +857,8 @@ void cv::HoughLinesP(InputArray _image, OutputArray _lines, double rho, double theta, int threshold, double minLineLength, double maxGap ) { - CV_OCL_RUN(_image.isUMat() && _lines.isUMat(), ocl_HoughLinesP(_image, _lines, rho, theta, threshold, minLineLength, maxGap)); + CV_OCL_RUN(_image.isUMat() && _lines.isUMat(), + ocl_HoughLinesP(_image, _lines, rho, theta, threshold, minLineLength, maxGap)); Mat image = _image.getMat(); std::vector lines; diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index d402d3741..19c465d38 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -12,7 +12,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int { int x = get_local_id(0); int y = get_group_id(1); - + __local int l_index, l_offset; __local int l_points[LOCAL_SIZE]; __global const uchar * src = src_ptr + mad24(y, src_step, src_offset); @@ -37,12 +37,12 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int } barrier(CLK_LOCAL_MEM_FENCE); - + if (x == 0) l_offset = atomic_add(global_offset, l_index); barrier(CLK_LOCAL_MEM_FENCE); - + list += l_offset; for (int i=x; i < l_index; i+=GROUP_SIZE) { @@ -53,8 +53,8 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int #elif defined FILL_ACCUM_GLOBAL __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset, - __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, - int total_points, float irho, float theta, int numrho, int numangle) + __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, + int total_points, float irho, float theta, int numrho, int numangle) { int theta_idx = get_global_id(1); int count_idx = get_global_id(0); @@ -90,7 +90,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i { int theta_idx = get_group_id(1); int count_idx = get_local_id(0); - + if (theta_idx > 0 && theta_idx < numangle + 1) { float cosVal; @@ -136,7 +136,7 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i #define ACCUM(ptr) *((__global int*)(ptr)) __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, - __global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr, + __global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr, int linesMax, int threshold, float rho, float theta) { int x0 = get_global_id(0); @@ -148,7 +148,7 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of __global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x0+1, (int) sizeof(int), accum_offset)); __global float2* lines = (__global float2*)(lines_ptr + lines_offset); __global int* lines_index = lines_index_ptr + 1; - + for (int x=x0; x= threshold && + curVote > ACCUM(accum - accum_step - sizeof(int)) && + curVote > ACCUM(accum - accum_step) && + curVote > ACCUM(accum - accum_step + sizeof(int)) && + curVote > ACCUM(accum - sizeof(int)) && + curVote > ACCUM(accum + sizeof(int)) && + curVote > ACCUM(accum + accum_step - sizeof(int)) && + curVote > ACCUM(accum + accum_step) && + curVote > ACCUM(accum + accum_step + sizeof(int))) + { + const float radius = (x - (accum_cols - 2 - 1) * 0.5f) * rho; + const float angle = y * theta; + + float cosa; + float sina = sincos(angle, &cosa); + + float2 p0 = (float2)(cosa * radius, sina * radius); + float2 dir = (float2)(-sina, cosa); + + float2 pb[4] = { (float2)(-1, -1), (float2)(-1, -1), (float2)(-1, -1), (float2)(-1, -1) }; + float a; + + if (dir.x != 0) + { + a = -p0.x / dir.x; + pb[0].x = 0; + pb[0].y = p0.y + a * dir.y; + + a = (src_cols - 1 - p0.x) / dir.x; + pb[1].x = src_cols - 1; + pb[1].y = p0.y + a * dir.y; + } + if (dir.y != 0) + { + a = -p0.y / dir.y; + pb[2].x = p0.x + a * dir.x; + pb[2].y = 0; + + a = (src_rows - 1 - p0.y) / dir.y; + pb[3].x = p0.x + a * dir.x; + pb[3].y = src_rows - 1; + } + + if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < src_rows)) + { + p0 = pb[0]; + if (dir.x < 0) + dir = -dir; + } + else if (pb[1].x == src_cols - 1 && (pb[0].y >= 0 && pb[0].y < src_rows)) + { + p0 = pb[1]; + if (dir.x > 0) + dir = -dir; + } + else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < src_cols)) + { + p0 = pb[2]; + if (dir.y < 0) + dir = -dir; + } + else if (pb[3].y == src_rows - 1 && (pb[3].x >= 0 && pb[3].x < src_cols)) + { + p0 = pb[3]; + if (dir.y > 0) + dir = -dir; + } + + float2 d; + if (fabs(dir.x) > fabs(dir.y)) + { + d.x = dir.x > 0 ? 1 : -1; + d.y = dir.y / fabs(dir.x); + } + else + { + d.x = dir.x / fabs(dir.y); + d.y = dir.y > 0 ? 1 : -1; + } + + float2 line_end[2]; + int gap; + bool inLine = false; + + float2 p1 = p0; + if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows) + return; + + for (;;) + { + if (*(src_ptr + mad24(p1.y, src_step, p1.x + src_offset))) + { + gap = 0; + + if (!inLine) + { + line_end[0] = p1; + line_end[1] = p1; + inLine = true; + } + else + { + line_end[1] = p1; + } + } + else if (inLine) + { + if (++gap > lineGap) + { + bool good_line = fabs(line_end[1].x - line_end[0].x) >= lineLength || + fabs(line_end[1].y - line_end[0].y) >= lineLength; + + if (good_line) + { + int index = atomic_inc(lines_index); + if (index < linesMax) + lines[index] = (int4)(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); + } + + gap = 0; + inLine = false; + } + } + + p1 = p1 + d; + if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows) + { + if (inLine) + { + bool good_line = fabs(line_end[1].x - line_end[0].x) >= lineLength || + fabs(line_end[1].y - line_end[0].y) >= lineLength; + + if (good_line) + { + int index = atomic_inc(lines_index); + if (index < linesMax) + lines[index] = (int4)(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); + } + + } + break; + } + } + + } +} + #endif \ No newline at end of file diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp index 9c92c8ec6..aa251a776 100644 --- a/modules/imgproc/test/ocl/test_houghlines.cpp +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -15,17 +15,18 @@ namespace ocl { struct Vec2fComparator { - bool operator()(const cv::Vec2f& a, const cv::Vec2f b) const + bool operator()(const Vec2f& a, const Vec2f b) const { if(a[0] != b[0]) return a[0] < b[0]; else return a[1] < b[1]; } }; -PARAM_TEST_CASE(HoughLinesTestBase, double, double, int) +/////////////////////////////// HoughLines //////////////////////////////////// + +PARAM_TEST_CASE(HoughLines, double, double, int) { - double rhoStep; - double thetaStep; + double rhoStep, thetaStep; int threshold; Size src_size; @@ -50,7 +51,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, double, double, int) line(src, Point(100, 0), Point(100, 200), Scalar::all(255), 1); line(src, Point(200, 0), Point(200, 200), Scalar::all(255), 1); line(src, Point(400, 0), Point(400, 200), Scalar::all(255), 1); - + src.copyTo(usrc); } @@ -65,7 +66,7 @@ PARAM_TEST_CASE(HoughLinesTestBase, double, double, int) virtual void Near(double eps = 0.) { EXPECT_EQ(dst.size(), udst.size()); - + if (dst.total() > 0) { Mat lines_cpu, lines_gpu; @@ -80,8 +81,6 @@ PARAM_TEST_CASE(HoughLinesTestBase, double, double, int) } }; -typedef HoughLinesTestBase HoughLines; - OCL_TEST_P(HoughLines, RealImage) { readRealTestData(); @@ -105,10 +104,81 @@ OCL_TEST_P(HoughLines, GeneratedImage) } } +/////////////////////////////// HoughLinesP /////////////////////////////////// + +PARAM_TEST_CASE(HoughLinesP, int, double, double) +{ + double rhoStep, thetaStep, minLineLength, maxGap; + int threshold; + + Size src_size; + Mat src, dst; + UMat usrc, udst; + + virtual void SetUp() + { + rhoStep = 1.0; + thetaStep = CV_PI / 180; + threshold = GET_PARAM(0); + minLineLength = GET_PARAM(1); + maxGap = GET_PARAM(2); + } + + virtual void readRealTestData() + { + Mat img = readImage("shared/pic5.png", IMREAD_GRAYSCALE); + Canny(img, src, 50, 200, 3); + + src.copyTo(usrc); + } + + virtual void Near(double eps = 0.) + { + Mat lines_gpu = udst.getMat(ACCESS_READ); + + if (dst.total() > 0 && lines_gpu.total() > 0) + { + Mat result_cpu(src.size(), CV_8UC1, Scalar::all(0)); + Mat result_gpu(src.size(), CV_8UC1, Scalar::all(0)); + + MatConstIterator_ it = dst.begin(), end = dst.end(); + for ( ; it != end; it++) + { + Vec4i p = *it; + line(result_cpu, Point(p[0], p[1]), Point(p[2], p[3]), Scalar(255)); + } + + it = lines_gpu.begin(), end = lines_gpu.end(); + for ( ; it != end; it++) + { + Vec4i p = *it; + line(result_gpu, Point(p[0], p[1]), Point(p[2], p[3]), Scalar(255)); + } + + EXPECT_MAT_SIMILAR(result_cpu, result_gpu, eps); + } + } +}; + + +OCL_TEST_P(HoughLinesP, RealImage) +{ + readRealTestData(); + + OCL_OFF(cv::HoughLinesP(src, dst, rhoStep, thetaStep, threshold, minLineLength, maxGap)); + OCL_ON(cv::HoughLinesP(usrc, udst, rhoStep, thetaStep, threshold, minLineLength, maxGap)); + + Near(0.2); +} + OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep Values(CV_PI / 180.0, CV_PI / 360.0), // thetaStep Values(80, 150))); // threshold +OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLinesP, Combine(Values(100, 150), // threshold + Values(50, 100), // minLineLength + Values(5, 10))); // maxLineGap + } } // namespace cvtest::ocl #endif // HAVE_OPENCL \ No newline at end of file From 66a8acfd3d0f2cc32845a8fca44311eeded1690f Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Tue, 7 Oct 2014 14:57:02 +0400 Subject: [PATCH 9/9] Optimization for HoughLinesP --- .../cudaimgproc/src/cuda/hough_segments.cu | 2 +- modules/imgproc/src/hough.cpp | 21 +++++--- modules/imgproc/src/opencl/hough_lines.cl | 52 ++++++++----------- modules/imgproc/test/ocl/test_houghlines.cpp | 2 +- 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/modules/cudaimgproc/src/cuda/hough_segments.cu b/modules/cudaimgproc/src/cuda/hough_segments.cu index ee50e00c6..ca433d30d 100644 --- a/modules/cudaimgproc/src/cuda/hough_segments.cu +++ b/modules/cudaimgproc/src/cuda/hough_segments.cu @@ -117,7 +117,7 @@ namespace cv { namespace cuda { namespace device if (dir.x < 0) dir = -dir; } - else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows)) + else if (pb[1].x == cols - 1 && (pb[1].y >= 0 && pb[1].y < rows)) { p0 = pb[1]; if (dir.x > 0) diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 7631b3bf2..ead62d1d7 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -221,7 +221,7 @@ HoughLinesSDiv( const Mat& img, std::vector lst; CV_Assert( img.type() == CV_8UC1 ); - CV_Assert( linesMax > 0 && rho > 0 && theta > 0 ); + CV_Assert( linesMax > 0 ); threshold = MIN( threshold, 255 ); @@ -655,6 +655,8 @@ HoughLinesProbabilistic( Mat& image, #ifdef HAVE_OPENCL +#define OCL_MAX_LINES 4096 + static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters) { UMat src = _src.getUMat(); @@ -702,7 +704,7 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_ if (fillAccumKernel.empty()) return false; globalThreads[0] = workgroup_size; globalThreads[1] = numangle; - fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnlyNoSize(accum), total_points, irho, (float) theta, numrho, numangle); return fillAccumKernel.run(2, globalThreads, NULL, false); } @@ -714,7 +716,7 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_ return false; localThreads[0] = workgroup_size; localThreads[1] = 1; globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2; - fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnlyNoSize(accum), total_points, irho, (float) theta, numrho, numangle); return fillAccumKernel.run(2, globalThreads, localThreads, false); } @@ -731,6 +733,9 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub if (min_theta < 0 || min_theta > max_theta ) { CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" ); } + if (!(rho > 0 && theta > 0)) { + CV_Error( CV_StsBadArg, "rho and theta must be greater 0" ); + } UMat src = _src.getUMat(); int numangle = cvRound((max_theta - min_theta) / theta); @@ -759,8 +764,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub if (getLinesKernel.empty()) return false; - // TODO: investigate other strategies to choose linesMax - int linesMax = min(total_points*numangle/threshold, 4096); + int linesMax = threshold > 0 ? min(total_points*numangle/threshold, OCL_MAX_LINES) : OCL_MAX_LINES; UMat lines(linesMax, 1, CV_32FC2); getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), @@ -783,6 +787,10 @@ static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, dou { CV_Assert(_src.type() == CV_8UC1); + if (!(rho > 0 && theta > 0)) { + CV_Error( CV_StsBadArg, "rho and theta must be greater 0" ); + } + UMat src = _src.getUMat(); int numangle = cvRound(CV_PI / theta); int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); @@ -809,8 +817,7 @@ static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, dou if (getLinesKernel.empty()) return false; - // TODO: investigate other strategies to choose linesMax - int linesMax = min(total_points*numangle/threshold, 4096); + int linesMax = threshold > 0 ? min(total_points*numangle/threshold, OCL_MAX_LINES) : OCL_MAX_LINES; UMat lines(linesMax, 1, CV_32SC4); getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::ReadOnly(src), diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 19c465d38..f318133ec 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -5,6 +5,8 @@ // Copyright (C) 2014, Itseez, Inc., all rights reserved. // Third party copyrights are property of their respective owners. +#define ACCUM(ptr) *((__global int*)(ptr)) + #ifdef MAKE_POINTS_LIST __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols, @@ -25,11 +27,13 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int if (y < src_rows) { + y <<= 16; + for (int i=x; i < src_cols; i+=GROUP_SIZE) { if (src[i]) { - int val = (y << 16) | i; + int val = y | i; int index = atomic_inc(&l_index); l_points[index] = val; } @@ -53,7 +57,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int #elif defined FILL_ACCUM_GLOBAL __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset, - __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, + __global uchar * accum_ptr, int accum_step, int accum_offset, int total_points, float irho, float theta, int numrho, int numangle) { int theta_idx = get_global_id(1); @@ -76,7 +80,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, const int x = (val & 0xFFFF); const int y = (val >> 16) & 0xFFFF; - int r = convert_int_rte(x * cosVal + y * sinVal) + shift; + int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift; atomic_inc(accum + r + 1); } } @@ -85,7 +89,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, #elif defined FILL_ACCUM_LOCAL __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset, - __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, + __global uchar * accum_ptr, int accum_step, int accum_offset, int total_points, float irho, float theta, int numrho, int numangle) { int theta_idx = get_group_id(1); @@ -133,15 +137,13 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i #elif defined GET_LINES -#define ACCUM(ptr) *((__global int*)(ptr)) - __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, __global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr, int linesMax, int threshold, float rho, float theta) { int x0 = get_global_id(0); int y = get_global_id(1); - int gl_size = get_global_size(0); + int glob_size = get_global_size(0); if (y < accum_rows-2) { @@ -149,7 +151,7 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of __global float2* lines = (__global float2*)(lines_ptr + lines_offset); __global int* lines_index = lines_index_ptr + 1; - for (int x=x0; x= 0 && pb[0].y < src_rows)) + else if (pb[1].x == src_cols - 1 && (pb[1].y >= 0 && pb[1].y < src_rows)) { p0 = pb[1]; if (dir.x > 0) @@ -258,41 +259,30 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac dir = -dir; } - float2 d; - if (fabs(dir.x) > fabs(dir.y)) - { - d.x = dir.x > 0 ? 1 : -1; - d.y = dir.y / fabs(dir.x); - } - else - { - d.x = dir.x / fabs(dir.y); - d.y = dir.y > 0 ? 1 : -1; - } + dir /= max(fabs(dir.x), fabs(dir.y)); float2 line_end[2]; int gap; bool inLine = false; - float2 p1 = p0; - if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows) + if (p0.x < 0 || p0.x >= src_cols || p0.y < 0 || p0.y >= src_rows) return; for (;;) { - if (*(src_ptr + mad24(p1.y, src_step, p1.x + src_offset))) + if (*(src_ptr + mad24(p0.y, src_step, p0.x + src_offset))) { gap = 0; if (!inLine) { - line_end[0] = p1; - line_end[1] = p1; + line_end[0] = p0; + line_end[1] = p0; inLine = true; } else { - line_end[1] = p1; + line_end[1] = p0; } } else if (inLine) @@ -314,8 +304,8 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac } } - p1 = p1 + d; - if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows) + p0 = p0 + dir; + if (p0.x < 0 || p0.x >= src_cols || p0.y < 0 || p0.y >= src_rows) { if (inLine) { diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp index aa251a776..1f9d802b9 100644 --- a/modules/imgproc/test/ocl/test_houghlines.cpp +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -168,7 +168,7 @@ OCL_TEST_P(HoughLinesP, RealImage) OCL_OFF(cv::HoughLinesP(src, dst, rhoStep, thetaStep, threshold, minLineLength, maxGap)); OCL_ON(cv::HoughLinesP(usrc, udst, rhoStep, thetaStep, threshold, minLineLength, maxGap)); - Near(0.2); + Near(0.25); } OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep