diff --git a/modules/cudaimgproc/src/generalized_hough.cpp b/modules/cudaimgproc/src/generalized_hough.cpp index d68b76e62..9810bed3a 100644 --- a/modules/cudaimgproc/src/generalized_hough.cpp +++ b/modules/cudaimgproc/src/generalized_hough.cpp @@ -239,8 +239,9 @@ namespace void GeneralizedHoughBase::detectImpl(InputArray image, OutputArray positions, OutputArray votes) { #ifndef HAVE_OPENCV_CUDAFILTERS - (void) templ; - (void) templCenter; + (void) image; + (void) positions; + (void) votes; throw_no_cuda(); #else calcEdges(image, imageEdges_, imageDx_, imageDy_); diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 17dbcc662..d0440d231 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -668,9 +668,10 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub UMat src = _src.getUMat(); - float irho = 1 / rho; + float irho = (float) (1 / rho); int numangle = cvRound((max_theta - min_theta) / theta); int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); + ocl::Device dev = ocl::Device::getDefault(); // make list of nonzero points const int pixelsPerWI = 4; @@ -680,7 +681,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub if (pointListKernel.empty()) return false; - UMat pointsList(1, src.total(), CV_32SC1); + UMat pointsList(1, (int) src.total(), CV_32SC1); UMat total(1, 1, CV_32SC1, Scalar::all(0)); pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), ocl::KernelArg::PtrWriteOnly(total)); @@ -692,37 +693,66 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub int total_points = total.getMat(ACCESS_READ).at(0, 0); if (total_points <= 0) - return false; + { + _lines.assign(UMat(0,0,CV_32FC2)); + return true; + } // convert src to hough space - group_size = (total_points + pixelsPerWI - 1)/pixelsPerWI; - ocl::Kernel fillAccumKernel("fill_accum", ocl::imgproc::hough_lines_oclsrc, - format("-D FILL_ACCUM -D GROUP_SIZE=%d", group_size)); + group_size = min((int) dev.maxWorkGroupSize(), total_points); + int local_memory_needed = (numrho + 2)*sizeof(int); + ocl::Kernel fillAccumKernel; + globalThreads[0] = group_size; globalThreads[1] = numangle; + size_t* fillAccumLT = NULL; + + UMat accum(numangle + 2, numrho + 2, CV_32SC1); + + if (local_memory_needed > dev.localMemSize()) + { + fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc, + format("-D FILL_ACCUM_GLOBAL")); + accum.setTo(Scalar::all(0)); + } + else + { + fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc, + format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", group_size, numrho + 2)); + localThreads[0] = group_size; localThreads[1] = 1; + fillAccumLT = localThreads; + } if (fillAccumKernel.empty()) return false; - UMat accum(numangle + 2, numrho + 2, CV_32SC1, Scalar::all(0)); + int linesMax = min(total_points*numangle/threshold, 4096); + UMat lines(linesMax, 1, CV_32FC2); + UMat lines_count(1, 1, CV_32SC1, Scalar::all(0)); + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), total_points, irho, (float) theta, numrho, numangle); - globalThreads[0] = group_size; globalThreads[1] = numangle; - if (!fillAccumKernel.run(2, globalThreads, NULL, false)) + + if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false)) return false; - printf("GPU: \n"); - int sum = 0; - Mat ac = accum.getMat(ACCESS_READ); - for (int i=0; i<8; i++) - { - for (int j=0; j<8; j++) - { - sum += ac.at(i, j); - printf("%d ", ac.at(i, j)); - } - printf("\n"); - } - printf("sum = %d\n", sum); - return false; + ocl::Kernel getLinesKernel("get_lines", ocl::imgproc::hough_lines_oclsrc, + format("-D GET_LINES")); + if (getLinesKernel.empty()) + return false; + + globalThreads[0] = numrho; globalThreads[1] = numangle; + getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), + ocl::KernelArg::PtrWriteOnly(lines_count), linesMax, threshold, (float) rho, (float) theta); + + if (!getLinesKernel.run(2, globalThreads, NULL, false)) + return false; + + + int total_lines = min(lines_count.getMat(ACCESS_READ).at(0, 0), linesMax); + if (total_lines > 0) + _lines.assign(lines.rowRange(Range(0, total_lines))); + else + _lines.assign(UMat(0,0,CV_32FC2)); + return true; } } diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 756416309..2b8311567 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -50,9 +50,9 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int } } -#elif defined FILL_ACCUM +#elif defined FILL_ACCUM_GLOBAL -__kernel void fill_accum(__global const uchar * list_ptr, int list_step, int list_offset, +__kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset, __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, int count, float irho, float theta, int numrho, int numangle) { @@ -82,5 +82,82 @@ __kernel void fill_accum(__global const uchar * list_ptr, int list_step, int lis } } +#elif defined FILL_ACCUM_LOCAL + +__kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset, + __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, + int count, float irho, float theta, int numrho, int numangle) +{ + int theta_idx = get_global_id(1); + int count_idx = get_local_id(0); + + float cosVal; + float sinVal = sincos(theta * ((float)theta_idx), &cosVal); + sinVal *= irho; + cosVal *= irho; + + __local int l_accum[BUFFER_SIZE]; + for (int i=count_idx; i> 16) & 0xFFFF; + + int r = convert_int_rte(x * cosVal + y * sinVal) + shift; + atomic_inc(l_accum + r + 1); + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + __global int* accum = (__global int*)(accum_ptr + mad24(theta_idx + 1, accum_step, accum_offset)); + for (int i=count_idx; i threshold && curVote > ACCUM(accum - sizeof(int)) && curVote >= ACCUM(accum + sizeof(int)) && + curVote > ACCUM(accum - accum_step) && curVote >= ACCUM(accum + accum_step)) + { + int index = atomic_inc(lines_index); + + if (index < linesMax) + { + float radius = (x - (accum_cols - 3) * 0.5f) * rho; + float angle = y * theta; + + lines[index] = (float2)(radius, angle); + } + } + } +} + #endif diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp index 00577b72b..2d78c0403 100644 --- a/modules/imgproc/test/ocl/test_houghlines.cpp +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -13,49 +13,66 @@ namespace cvtest { namespace ocl { -PARAM_TEST_CASE(HoughLinesTestBase, bool) +struct Vec2fComparator +{ + bool operator()(const cv::Vec2f& a, const cv::Vec2f b) const + { + if(a[0] != b[0]) return a[0] < b[0]; + else return a[1] < b[1]; + } +}; + +PARAM_TEST_CASE(HoughLinesTestBase, double, double, int) { double rhoStep; double thetaStep; int threshold; - bool useRoi; + Size src_size; Mat src, dst; UMat usrc, udst; virtual void SetUp() { - rhoStep = 10; - thetaStep = 0.5; - threshold = 80; - useRoi = false; + rhoStep = GET_PARAM(0); + thetaStep = GET_PARAM(1); + threshold = GET_PARAM(2); } virtual void generateTestData() { - //Mat image = readImage("shared/pic1.png", IMREAD_GRAYSCALE); + src_size = randomSize(500, 1000); + src.create(src_size, CV_8UC1); + src.setTo(Scalar::all(0)); + line(src, Point(0, 100), Point(100, 100), Scalar::all(255), 1); + line(src, Point(0, 200), Point(100, 200), Scalar::all(255), 1); + line(src, Point(0, 400), Point(100, 400), Scalar::all(255), 1); + line(src, Point(100, 0), Point(100, 200), Scalar::all(255), 1); + line(src, Point(200, 0), Point(200, 200), Scalar::all(255), 1); + line(src, Point(400, 0), Point(400, 200), Scalar::all(255), 1); - Mat image = randomMat(Size(20, 10), CV_8UC1, 0, 255, false); - - cv::threshold(image, src, 127, 255, THRESH_BINARY); - //Canny(image, src, 100, 150, 3); src.copyTo(usrc); } }; typedef HoughLinesTestBase HoughLines; -OCL_TEST_P(HoughLines, RealImage) +OCL_TEST_P(HoughLines, GeneratedImage) { - generateTestData(); + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); - //std::cout << src << std::endl; + OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold)); + OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold)); - OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold, 0, 0)); - OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold, 0, 0)); + //Near(1e-5); + } } -OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Values(true, false)); +OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep + Values(CV_PI / 180.0, CV_PI / 360.0), // thetaStep + Values(80, 150))); // threshold } } // namespace cvtest::ocl