From caf91ac159821dc44e9f781d9609fe46fd53b24e Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 13 Dec 2012 17:18:25 +0400
Subject: [PATCH] new gpu::HoughLinesP implementation

---
 modules/gpu/include/opencv2/gpu/gpu.hpp |   2 +-
 modules/gpu/perf/perf_imgproc.cpp       |  14 +-
 modules/gpu/src/cuda/hough.cu           | 207 +++++++++++++++++-------
 modules/gpu/src/hough.cpp               |  39 +++--
 samples/gpu/houghlines.cpp              |  10 +-
 samples/gpu/softcascade.cpp             |   2 +-
 6 files changed, 190 insertions(+), 84 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index 866c3de7d..298bf918a 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -850,7 +850,7 @@ CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, O
 //! HoughLinesP
 
 //! finds line segments in the black-n-white image using probabalistic Hough transform
-CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, CannyBuf& cannyBuf, int minLineLength, int maxLineGap, int maxLines = 4096);
+CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096);
 
 //! HoughCircles
 
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp
index b8feab83a..ee0968442 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -1813,17 +1813,20 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "s
 
     cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
 
+    cv::Mat mask;
+    cv::Canny(image, mask, 50, 100);
+
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_image(image);
+        cv::gpu::GpuMat d_mask(mask);
         cv::gpu::GpuMat d_lines;
-        cv::gpu::CannyBuf d_buf;
+        cv::gpu::HoughLinesBuf d_buf;
 
-        cv::gpu::HoughLinesP(d_image, d_lines, d_buf, minLineLenght, maxLineGap);
+        cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
 
         TEST_CYCLE()
         {
-            cv::gpu::HoughLinesP(d_image, d_lines, d_buf, minLineLenght, maxLineGap);
+            cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
         }
 
         cv::Mat h_lines(d_lines);
@@ -1834,9 +1837,6 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "s
     }
     else
     {
-        cv::Mat mask;
-        cv::Canny(image, mask, 50, 100);
-
         std::vector<cv::Vec4i> lines;
         cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap);
 
diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/hough.cu
index c4dfbcb66..695a47def 100644
--- a/modules/gpu/src/cuda/hough.cu
+++ b/modules/gpu/src/cuda/hough.cu
@@ -298,79 +298,168 @@ namespace cv { namespace gpu { namespace device
 
         texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp);
 
-        __global__ void houghLinesProbabilistic(const PtrStepSzi Dx, const PtrStepi Dy,
+        __global__ void houghLinesProbabilistic(const PtrStepSzi accum,
                                                 int4* out, const int maxSize,
-                                                const int lineGap, const int lineLength)
+                                                const float rho, const float theta,
+                                                const int lineGap, const int lineLength,
+                                                const int rows, const int cols)
         {
-            const int SHIFT = 10;
+            const int r = blockIdx.x * blockDim.x + threadIdx.x;
+            const int n = blockIdx.y * blockDim.y + threadIdx.y;
 
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= Dx.cols || y >= Dx.rows || tex2D(tex_mask, x, y) == 0)
+            if (r >= accum.cols - 2 || n >= accum.rows - 2)
                 return;
 
-            const int dx = Dx(y, x);
-            const int dy = Dy(y, x);
+            const int curVotes = accum(n + 1, r + 1);
 
-            if (dx == 0 && dy == 0)
-                return;
-
-            const int vx = dy;
-            const int vy = -dx;
-
-            const float mag = ::sqrtf(vx * vx + vy * vy);
-
-            const int x0 = x << SHIFT;
-            const int y0 = y << SHIFT;
-
-            int sx = __float2int_rn((vx << SHIFT) / mag);
-            int sy = __float2int_rn((vy << SHIFT) / mag);
-
-            int2 line_end[2] = {make_int2(x,y), make_int2(x,y)};
-
-            for (int k = 0; k < 2; ++k)
+            if (curVotes >= lineLength &&
+                curVotes > accum(n, r) &&
+                curVotes > accum(n, r + 1) &&
+                curVotes > accum(n, r + 2) &&
+                curVotes > accum(n + 1, r) &&
+                curVotes > accum(n + 1, r + 2) &&
+                curVotes > accum(n + 2, r) &&
+                curVotes > accum(n + 2, r + 1) &&
+                curVotes > accum(n + 2, r + 2))
             {
-                int gap = 0;
-                int x1 = x0 + sx;
-                int y1 = y0 + sy;
+                const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho;
+                const float angle = n * theta;
 
-                for (;; x1 += sx, y1 += sy)
+                float cosa;
+                float sina;
+                sincosf(angle, &sina, &cosa);
+
+                float2 p0 = make_float2(cosa * radius, sina * radius);
+                float2 dir = make_float2(-sina, cosa);
+
+                float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)};
+                float a;
+
+                if (dir.x != 0)
                 {
-                    const int x2 = x1 >> SHIFT;
-                    const int y2 = y1 >> SHIFT;
+                    a = -p0.x / dir.x;
+                    pb[0].x = 0;
+                    pb[0].y = p0.y + a * dir.y;
 
-                    if (x2 < 0 || x2 >= Dx.cols || y2 < 0 || y2 >= Dx.rows)
-                        break;
+                    a = (cols - 1 - p0.x) / dir.x;
+                    pb[1].x = cols - 1;
+                    pb[1].y = p0.y + a * dir.y;
+                }
+                if (dir.y != 0)
+                {
+                    a = -p0.y / dir.y;
+                    pb[2].x = p0.x + a * dir.x;
+                    pb[2].y = 0;
 
-                    if (tex2D(tex_mask, x2, y2))
-                    {
-                        gap = 0;
-                        line_end[k].x = x2;
-                        line_end[k].y = y2;
-                    }
-                    else if(++gap > lineGap)
-                        break;
+                    a = (rows - 1 - p0.y) / dir.y;
+                    pb[3].x = p0.x + a * dir.x;
+                    pb[3].y = rows - 1;
                 }
 
-                sx = -sx;
-                sy = -sy;
-            }
+                if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows))
+                {
+                    p0 = pb[0];
+                    if (dir.x < 0)
+                        dir = -dir;
+                }
+                else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows))
+                {
+                    p0 = pb[1];
+                    if (dir.x > 0)
+                        dir = -dir;
+                }
+                else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols))
+                {
+                    p0 = pb[2];
+                    if (dir.y < 0)
+                        dir = -dir;
+                }
+                else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols))
+                {
+                    p0 = pb[3];
+                    if (dir.y > 0)
+                        dir = -dir;
+                }
 
-            const bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
-                                   ::abs(line_end[1].y - line_end[0].y) >= lineLength;
+                float2 d;
+                if (::fabsf(dir.x) > ::fabsf(dir.y))
+                {
+                    d.x = dir.x > 0 ? 1 : -1;
+                    d.y = dir.y / ::fabsf(dir.x);
+                }
+                else
+                {
+                    d.x = dir.x / ::fabsf(dir.y);
+                    d.y = dir.y > 0 ? 1 : -1;
+                }
 
-            if (good_line)
-            {
-                const int ind = ::atomicAdd(&g_counter, 1);
-                if (ind < maxSize)
-                    out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
+                float2 line_end[2];
+                int gap;
+                bool inLine = false;
+
+                float2 p1 = p0;
+                if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
+                    return;
+
+                for (;;)
+                {
+                    if (tex2D(tex_mask, p1.x, p1.y))
+                    {
+                        gap = 0;
+
+                        if (!inLine)
+                        {
+                            line_end[0] = p1;
+                            line_end[1] = p1;
+                            inLine = true;
+                        }
+                        else
+                        {
+                            line_end[1] = p1;
+                        }
+                    }
+                    else if (inLine)
+                    {
+                        if (++gap > lineGap)
+                        {
+                            bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
+                                             ::abs(line_end[1].y - line_end[0].y) >= lineLength;
+
+                            if (good_line)
+                            {
+                                const int ind = ::atomicAdd(&g_counter, 1);
+                                if (ind < maxSize)
+                                    out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
+                            }
+
+                            gap = 0;
+                            inLine = false;
+                        }
+                    }
+
+                    p1 = p1 + d;
+                    if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
+                    {
+                        if (inLine)
+                        {
+                            bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
+                                             ::abs(line_end[1].y - line_end[0].y) >= lineLength;
+
+                            if (good_line)
+                            {
+                                const int ind = ::atomicAdd(&g_counter, 1);
+                                if (ind < maxSize)
+                                    out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
+                            }
+
+                        }
+                        break;
+                    }
+                }
             }
         }
 
-        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi Dx, PtrStepSzi Dy,
-                                        int4* out, int maxSize,
-                                        int lineGap, int lineLength)
+        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
         {
             void* counterPtr;
             cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
@@ -378,11 +467,15 @@ namespace cv { namespace gpu { namespace device
             cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
 
             const dim3 block(32, 8);
-            const dim3 grid(divUp(mask.cols, block.x), divUp(mask.rows, block.y));
+            const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
 
             bindTexture(&tex_mask, mask);
 
-            houghLinesProbabilistic<<<grid, block>>>(Dx, Dy, out, maxSize, lineGap, lineLength);
+            houghLinesProbabilistic<<<grid, block>>>(accum,
+                                                     out, maxSize,
+                                                     rho, theta,
+                                                     lineGap, lineLength,
+                                                     mask.rows, mask.cols);
             cudaSafeCall( cudaGetLastError() );
 
             cudaSafeCall( cudaDeviceSynchronize() );
diff --git a/modules/gpu/src/hough.cpp b/modules/gpu/src/hough.cpp
index daeb62536..b1751076c 100644
--- a/modules/gpu/src/hough.cpp
+++ b/modules/gpu/src/hough.cpp
@@ -52,7 +52,7 @@ void cv::gpu::HoughLines(const GpuMat&, GpuMat&, float, float, int, bool, int) {
 void cv::gpu::HoughLines(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, bool, int) { throw_nogpu(); }
 void cv::gpu::HoughLinesDownload(const GpuMat&, OutputArray, OutputArray) { throw_nogpu(); }
 
-void cv::gpu::HoughLinesP(const GpuMat&, GpuMat&, CannyBuf&, int, int, int) { throw_nogpu(); }
+void cv::gpu::HoughLinesP(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, int, int) { throw_nogpu(); }
 
 void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, int, float, float, int, int, int, int, int) { throw_nogpu(); }
 void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, HoughCirclesBuf&, int, float, float, int, int, int, int, int) { throw_nogpu(); }
@@ -164,28 +164,41 @@ namespace cv { namespace gpu { namespace device
 {
     namespace hough
     {
-        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi Dx, PtrStepSzi Dy,
-                                        int4* out, int maxSize,
-                                        int lineGap, int lineLength);
+        int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength);
     }
 }}}
 
-void cv::gpu::HoughLinesP(const GpuMat& image, GpuMat& lines, CannyBuf& cannyBuf, int minLineLength, int maxLineGap, int maxLines)
+void cv::gpu::HoughLinesP(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines)
 {
     using namespace cv::gpu::device::hough;
 
-    CV_Assert( image.type() == CV_8UC1 );
-    CV_Assert( image.cols < std::numeric_limits<unsigned short>::max() );
-    CV_Assert( image.rows < std::numeric_limits<unsigned short>::max() );
+    CV_Assert( src.type() == CV_8UC1 );
+    CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
+    CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
 
-    GpuMat mask;
-    Canny(image, cannyBuf, mask, 50, 100);
+    ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list);
+    unsigned int* srcPoints = buf.list.ptr<unsigned int>();
+
+    const int pointsCount = buildPointList_gpu(src, srcPoints);
+    if (pointsCount == 0)
+    {
+        lines.release();
+        return;
+    }
+
+    const int numangle = cvRound(CV_PI / theta);
+    const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
+    CV_Assert( numangle > 0 && numrho > 0 );
+
+    ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum);
+    buf.accum.setTo(Scalar::all(0));
+
+    DeviceInfo devInfo;
+    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
 
     ensureSizeIsEnough(1, maxLines, CV_32SC4, lines);
 
-    int linesCount = houghLinesProbabilistic_gpu(mask, cannyBuf.dx, cannyBuf.dy,
-                                                 lines.ptr<int4>(), maxLines,
-                                                 maxLineGap, minLineLength);
+    int linesCount = houghLinesProbabilistic_gpu(src, buf.accum, lines.ptr<int4>(), maxLines, rho, theta, maxLineGap, minLineLength);
 
     if (linesCount > 0)
         lines.cols = linesCount;
diff --git a/samples/gpu/houghlines.cpp b/samples/gpu/houghlines.cpp
index e9bb96915..0f1932672 100644
--- a/samples/gpu/houghlines.cpp
+++ b/samples/gpu/houghlines.cpp
@@ -30,14 +30,14 @@ int main(int argc, const char* argv[])
     }
 
     Mat mask;
-    Canny(src, mask, 50, 200, 3);
+    Canny(src, mask, 100, 200, 3);
 
     Mat dst_cpu;
     cvtColor(mask, dst_cpu, CV_GRAY2BGR);
     Mat dst_gpu = dst_cpu.clone();
 
     vector<Vec4i> lines_cpu;
-    HoughLinesP(mask, lines_cpu, 1, CV_PI / 180, 50, 50, 5);
+    HoughLinesP(mask, lines_cpu, 1, CV_PI / 180, 50, 60, 5);
     cout << lines_cpu.size() << endl;
 
     for (size_t i = 0; i < lines_cpu.size(); ++i)
@@ -46,10 +46,10 @@ int main(int argc, const char* argv[])
         line(dst_cpu, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0, 0, 255), 3, CV_AA);
     }
 
-    GpuMat d_src(src);
+    GpuMat d_src(mask);
     GpuMat d_lines;
-    CannyBuf d_buf;
-    gpu::HoughLinesP(d_src, d_lines, d_buf, 50, 5);
+    HoughLinesBuf d_buf;
+    gpu::HoughLinesP(d_src, d_lines, d_buf, 1, CV_PI / 180, 50, 5);
     vector<Vec4i> lines_gpu;
     if (!d_lines.empty())
     {
diff --git a/samples/gpu/softcascade.cpp b/samples/gpu/softcascade.cpp
index fe518504b..66f82d50b 100644
--- a/samples/gpu/softcascade.cpp
+++ b/samples/gpu/softcascade.cpp
@@ -74,7 +74,7 @@ int main(int argc, char** argv)
             return 0;
         }
 
-        cv::gpu::GpuMat dframe(frame), roi(frame.rows, frame.cols, CV_8UC1), trois;
+        cv::gpu::GpuMat dframe(frame), roi(frame.rows, frame.cols, CV_8UC1);
         roi.setTo(cv::Scalar::all(1));
         cascade.detect(dframe, roi, objects);