From 767ac9aa108b29c09867959503f939295e78267f Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 8 Aug 2011 08:53:55 +0000 Subject: [PATCH] added gpu::Canny function --- modules/gpu/include/opencv2/gpu/gpu.hpp | 27 +- modules/gpu/src/cuda/canny.cu | 489 ++++++++++++++++++++++++ modules/gpu/src/imgproc_gpu.cpp | 152 +++++++- modules/gpu/test/test_imgproc.cpp | 67 ++++ samples/gpu/performance/tests.cpp | 22 ++ 5 files changed, 754 insertions(+), 3 deletions(-) create mode 100644 modules/gpu/src/cuda/canny.cu diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 9a64893d3..c7330666d 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -976,7 +976,32 @@ namespace cv //! performs linear blending of two images //! to avoid accuracy errors sum of weigths shouldn't be very close to zero CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, - GpuMat& result, Stream& stream = Stream::Null()); + GpuMat& result, Stream& stream = Stream::Null()); + + + struct CV_EXPORTS CannyBuf; + + CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); + CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); + CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false); + CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false); + + struct CV_EXPORTS CannyBuf + { + CannyBuf() {} + explicit CannyBuf(const Size& image_size, int apperture_size = 3) {create(image_size, apperture_size);} + CannyBuf(const GpuMat& dx_, const GpuMat& dy_); + + void create(const Size& image_size, int apperture_size = 3); + + void release(); + + GpuMat dx, dy; + GpuMat dx_buf, dy_buf; + GpuMat edgeBuf; + GpuMat trackBuf1, trackBuf2; + Ptr filterDX, filterDY; + }; ////////////////////////////// Matrix reductions ////////////////////////////// diff --git a/modules/gpu/src/cuda/canny.cu b/modules/gpu/src/cuda/canny.cu new file mode 100644 index 000000000..5a7a868fa --- /dev/null +++ b/modules/gpu/src/cuda/canny.cu @@ -0,0 +1,489 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "internal_shared.hpp" +#include "opencv2/gpu/device/utility.hpp" + +using namespace cv::gpu; +using namespace cv::gpu::device; + +namespace cv { namespace gpu { namespace canny +{ + __global__ void calcSobelRowPass(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols) + { + __shared__ int smem[16][18]; + + const int j = blockIdx.x * blockDim.x + threadIdx.x; + const int i = blockIdx.y * blockDim.y + threadIdx.y; + + if (i < rows) + { + smem[threadIdx.y][threadIdx.x + 1] = src.ptr(i)[j]; + if (threadIdx.x == 0) + { + smem[threadIdx.y][0] = src.ptr(i)[max(j - 1, 0)]; + smem[threadIdx.y][17] = src.ptr(i)[min(j + 16, cols - 1)]; + } + __syncthreads(); + + if (j < cols) + { + dx_buf.ptr(i)[j] = -smem[threadIdx.y][threadIdx.x] + smem[threadIdx.y][threadIdx.x + 2]; + dy_buf.ptr(i)[j] = smem[threadIdx.y][threadIdx.x] + 2 * smem[threadIdx.y][threadIdx.x + 1] + smem[threadIdx.y][threadIdx.x + 2]; + } + } + } + + void calcSobelRowPass_gpu(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols) + { + dim3 block(16, 16, 1); + dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); + + calcSobelRowPass<<>>(src, dx_buf, dy_buf, rows, cols); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall(cudaThreadSynchronize()); + } + + struct L1 + { + static __device__ __forceinline__ float calc(int x, int y) + { + return abs(x) + abs(y); + } + }; + struct L2 + { + static __device__ __forceinline__ float calc(int x, int y) + { + return sqrtf(x * x + y * y); + } + }; + + template __global__ void calcMagnitude(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols) + { + __shared__ int sdx[18][16]; + __shared__ int sdy[18][16]; + + const int j = blockIdx.x * blockDim.x + threadIdx.x; + const int i = blockIdx.y * blockDim.y + threadIdx.y; + + if (j < cols) + { + sdx[threadIdx.y + 1][threadIdx.x] = dx_buf.ptr(i)[j]; + sdy[threadIdx.y + 1][threadIdx.x] = dy_buf.ptr(i)[j]; + if (threadIdx.y == 0) + { + sdx[0][threadIdx.x] = dx_buf.ptr(max(i - 1, 0))[j]; + sdx[17][threadIdx.x] = dx_buf.ptr(min(i + 16, rows - 1))[j]; + + sdy[0][threadIdx.x] = dy_buf.ptr(max(i - 1, 0))[j]; + sdy[17][threadIdx.x] = dy_buf.ptr(min(i + 16, rows - 1))[j]; + } + __syncthreads(); + + if (i < rows) + { + int x = sdx[threadIdx.y][threadIdx.x] + 2 * sdx[threadIdx.y + 1][threadIdx.x] + sdx[threadIdx.y + 2][threadIdx.x]; + int y = -sdy[threadIdx.y][threadIdx.x] + sdy[threadIdx.y + 2][threadIdx.x]; + + dx.ptr(i)[j] = x; + dy.ptr(i)[j] = y; + + mag.ptr(i + 1)[j + 1] = Norm::calc(x, y); + } + } + } + + void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad) + { + dim3 block(16, 16, 1); + dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); + + if (L2Grad) + calcMagnitude<<>>(dx_buf, dy_buf, dx, dy, mag, rows, cols); + else + calcMagnitude<<>>(dx_buf, dy_buf, dx, dy, mag, rows, cols); + + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall(cudaThreadSynchronize()); + } + + template __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols) + { + const int j = blockIdx.x * blockDim.x + threadIdx.x; + const int i = blockIdx.y * blockDim.y + threadIdx.y; + + if (i < rows && j < cols) + mag.ptr(i + 1)[j + 1] = Norm::calc(dx.ptr(i)[j], dy.ptr(i)[j]); + } + + void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad) + { + dim3 block(16, 16, 1); + dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); + + if (L2Grad) + calcMagnitude<<>>(dx, dy, mag, rows, cols); + else + calcMagnitude<<>>(dx, dy, mag, rows, cols); + + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall(cudaThreadSynchronize()); + } + +////////////////////////////////////////////////////////////////////////////////////////// + +#define CANNY_SHIFT 15 +#define TG22 (int)(0.4142135623730950488016887242097*(1< low_thresh) + { + const int tg22x = x * TG22; + const int tg67x = tg22x + ((x + x) << CANNY_SHIFT); + + y <<= CANNY_SHIFT; + + if (y < tg22x) + { + if (m > smem[threadIdx.y + 1][threadIdx.x] && m >= smem[threadIdx.y + 1][threadIdx.x + 2]) + edge_type = 1 + (int)(m > high_thresh); + } + else if( y > tg67x ) + { + if (m > smem[threadIdx.y][threadIdx.x + 1] && m >= smem[threadIdx.y + 2][threadIdx.x + 1]) + edge_type = 1 + (int)(m > high_thresh); + } + else + { + if (m > smem[threadIdx.y][threadIdx.x + 1 - s] && m > smem[threadIdx.y + 2][threadIdx.x + 1 + s]) + edge_type = 1 + (int)(m > high_thresh); + } + } + + map.ptr(i + 1)[j + 1] = edge_type; + } + } + +#undef CANNY_SHIFT +#undef TG22 + + void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh) + { + dim3 block(16, 16, 1); + dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); + + calcMap<<>>(dx, dy, mag, map, rows, cols, low_thresh, high_thresh); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall(cudaThreadSynchronize()); + } + +////////////////////////////////////////////////////////////////////////////////////////// + + __device__ unsigned int counter = 0; + + __global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols) + { + #if __CUDA_ARCH__ >= 120 + + __shared__ int smem[18][18]; + + const int j = blockIdx.x * 16 + threadIdx.x; + const int i = blockIdx.y * 16 + threadIdx.y; + + const int tid = threadIdx.y * 16 + threadIdx.x; + const int lx = tid % 18; + const int ly = tid / 18; + + if (ly < 14) + smem[ly][lx] = map.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx]; + + if (ly < 4 && blockIdx.y * 16 + ly + 14 <= rows && blockIdx.x * 16 + lx <= cols) + smem[ly + 14][lx] = map.ptr(blockIdx.y * 16 + ly + 14)[blockIdx.x * 16 + lx]; + + __syncthreads(); + + if (i < rows && j < cols) + { + int n; + + #pragma unroll + for (int k = 0; k < 16; ++k) + { + n = 0; + + if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1) + { + n += smem[threadIdx.y ][threadIdx.x ] == 2; + n += smem[threadIdx.y ][threadIdx.x + 1] == 2; + n += smem[threadIdx.y ][threadIdx.x + 2] == 2; + + n += smem[threadIdx.y + 1][threadIdx.x ] == 2; + n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2; + + n += smem[threadIdx.y + 2][threadIdx.x ] == 2; + n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2; + n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2; + } + + if (n > 0) + smem[threadIdx.y + 1][threadIdx.x + 1] = 2; + } + + const int e = smem[threadIdx.y + 1][threadIdx.x + 1]; + + map.ptr(i + 1)[j + 1] = e; + + n = 0; + + if (e == 2) + { + n += smem[threadIdx.y ][threadIdx.x ] == 1; + n += smem[threadIdx.y ][threadIdx.x + 1] == 1; + n += smem[threadIdx.y ][threadIdx.x + 2] == 1; + + n += smem[threadIdx.y + 1][threadIdx.x ] == 1; + n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1; + + n += smem[threadIdx.y + 2][threadIdx.x ] == 1; + n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1; + n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1; + } + + if (n > 0) + { + const unsigned int ind = atomicInc(&counter, (unsigned int)(-1)); + st[ind] = make_ushort2(j + 1, i + 1); + } + } + + #endif + } + + void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols) + { + dim3 block(16, 16, 1); + dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); + + edgesHysteresisLocal<<>>(map, st1, rows, cols); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall(cudaThreadSynchronize()); + } + + __constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; + __constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; + + __global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count) + { + #if __CUDA_ARCH__ >= 120 + + const int stack_size = 512; + + __shared__ unsigned int s_counter; + __shared__ unsigned int s_ind; + __shared__ ushort2 s_st[stack_size]; + + if (threadIdx.x == 0) + s_counter = 0; + __syncthreads(); + + int ind = blockIdx.y * gridDim.x + blockIdx.x; + + if (ind < count) + { + ushort2 pos = st1[ind]; + + if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows) + { + if (threadIdx.x < 8) + { + pos.x += c_dx[threadIdx.x]; + pos.y += c_dy[threadIdx.x]; + + if (map.ptr(pos.y)[pos.x] == 1) + { + map.ptr(pos.y)[pos.x] = 2; + + ind = atomicInc(&s_counter, (unsigned int)(-1)); + + s_st[ind] = pos; + } + } + __syncthreads(); + + while (s_counter > 0 && s_counter <= stack_size - blockDim.x) + { + const int subTaskIdx = threadIdx.x >> 3; + const int portion = min(s_counter, blockDim.x >> 3); + + pos.x = pos.y = 0; + + if (subTaskIdx < portion) + pos = s_st[s_counter - 1 - subTaskIdx]; + __syncthreads(); + + if (threadIdx.x == 0) + s_counter -= portion; + __syncthreads(); + + if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows) + { + pos.x += c_dx[threadIdx.x & 7]; + pos.y += c_dy[threadIdx.x & 7]; + + if (map.ptr(pos.y)[pos.x] == 1) + { + map.ptr(pos.y)[pos.x] = 2; + + ind = atomicInc(&s_counter, (unsigned int)(-1)); + + s_st[ind] = pos; + } + } + __syncthreads(); + } + + if (s_counter > 0) + { + if (threadIdx.x == 0) + { + ind = atomicAdd(&counter, s_counter); + s_ind = ind - s_counter; + } + __syncthreads(); + + ind = s_ind; + + for (int i = threadIdx.x; i < s_counter; i += blockDim.x) + { + st2[ind + i] = s_st[i]; + } + } + } + } + + #endif + } + + void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols) + { + void* counter_ptr; + cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, "cv::gpu::canny::counter") ); + + unsigned int count; + cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); + + while (count > 0) + { + cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) ); + + dim3 block(128, 1, 1); + dim3 grid(min(count, 65535u), divUp(count, 65535), 1); + edgesHysteresisGlobal<<>>(map, st1, st2, rows, cols, count); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall(cudaThreadSynchronize()); + + cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); + + swap(st1, st2); + } + } + + __global__ void getEdges(PtrStepi map, PtrStep dst, int rows, int cols) + { + const int j = blockIdx.x * 16 + threadIdx.x; + const int i = blockIdx.y * 16 + threadIdx.y; + + if (i < rows && j < cols) + dst.ptr(i)[j] = (uchar)(-(map.ptr(i + 1)[j + 1] >> 1)); + } + + void getEdges_gpu(PtrStepi map, PtrStep dst, int rows, int cols) + { + dim3 block(16, 16, 1); + dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); + + getEdges<<>>(map, dst, rows, cols); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall(cudaThreadSynchronize()); + } +}}} diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 504c6c8c8..479ad8e80 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -92,8 +92,13 @@ void cv::gpu::pyrDown(const GpuMat&, GpuMat&, PyrDownBuf&, Stream&) { throw_nogp void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::PyrUpBuf::create(Size, int) { throw_nogpu(); } void cv::gpu::pyrUp(const GpuMat&, GpuMat&, PyrUpBuf&, Stream&) { throw_nogpu(); } - - +void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); } +void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); } +void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); } +void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_nogpu(); } +cv::gpu::CannyBuf::CannyBuf(const GpuMat&, const GpuMat&) { throw_nogpu(); } +void cv::gpu::CannyBuf::create(const Size&, int) { throw_nogpu(); } +void cv::gpu::CannyBuf::release() { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -1627,6 +1632,149 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, PyrUpBuf& buf, Stream& strea buf.filter->apply(buf.buf, dst, Rect(0, 0, buf.buf.cols, buf.buf.rows), stream); } + +////////////////////////////////////////////////////////////////////////////// +// Canny + +cv::gpu::CannyBuf::CannyBuf(const GpuMat& dx_, const GpuMat& dy_) : dx(dx_), dy(dy_) +{ + CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size()); + + create(dx_.size(), -1); +} + +void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size) +{ + ensureSizeIsEnough(image_size, CV_32SC1, dx); + ensureSizeIsEnough(image_size, CV_32SC1, dy); + + if (apperture_size == 3) + { + ensureSizeIsEnough(image_size, CV_32SC1, dx_buf); + ensureSizeIsEnough(image_size, CV_32SC1, dy_buf); + } + else if(apperture_size > 0) + { + if (!filterDX) + filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE); + if (!filterDY) + filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE); + } + + ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf); + + ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1); + ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2); +} + +void cv::gpu::CannyBuf::release() +{ + dx.release(); + dy.release(); + dx_buf.release(); + dy_buf.release(); + edgeBuf.release(); + trackBuf1.release(); + trackBuf2.release(); +} + +namespace cv { namespace gpu { namespace canny +{ + void calcSobelRowPass_gpu(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols); + + void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad); + void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad); + + void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh); + + void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols); + + void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols); + + void getEdges_gpu(PtrStepi map, PtrStep dst, int rows, int cols); +}}} + +namespace +{ + void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh) + { + using namespace cv::gpu::canny; + + calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh); + + edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1.ptr(), dst.rows, dst.cols); + + edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1.ptr(), buf.trackBuf2.ptr(), dst.rows, dst.cols); + + getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols); + } +} + +void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) +{ + CannyBuf buf(src.size(), apperture_size); + Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient); +} + +void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) +{ + using namespace cv::gpu::canny; + + CV_Assert(src.type() == CV_8UC1); + + if( low_thresh > high_thresh ) + std::swap( low_thresh, high_thresh); + + dst.create(src.size(), CV_8U); + dst.setTo(Scalar::all(0)); + + buf.create(src.size(), apperture_size); + buf.edgeBuf.setTo(Scalar::all(0)); + + if (apperture_size == 3) + { + calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols); + + calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); + } + else + { + buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows)); + buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows)); + + calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); + } + + CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); +} + +void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient) +{ + CannyBuf buf(dx, dy); + Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient); +} + +void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient) +{ + using namespace cv::gpu::canny; + + CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size()); + + if( low_thresh > high_thresh ) + std::swap( low_thresh, high_thresh); + + dst.create(dx.size(), CV_8U); + dst.setTo(Scalar::all(0)); + + buf.dx = dx; buf.dy = dy; + buf.create(dx.size(), -1); + buf.edgeBuf.setTo(Scalar::all(0)); + + calcMagnitude_gpu(dx, dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient); + + CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index 3839aa64b..407d3be94 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -2217,4 +2217,71 @@ TEST_P(PyrUp, Accuracy) INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::ValuesIn(devices())); +//////////////////////////////////////////////////////// +// Canny + +struct Canny : testing::TestWithParam< std::tr1::tuple > +{ + static cv::Mat img; + + static void SetUpTestCase() + { + img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE); + } + + static void TearDownTestCase() + { + img.release(); + } + + cv::gpu::DeviceInfo devInfo; + int apperture_size; + bool L2gradient; + + double low_thresh; + double high_thresh; + + cv::Mat edges_gold; + + virtual void SetUp() + { + devInfo = std::tr1::get<0>(GetParam()); + apperture_size = std::tr1::get<1>(GetParam()); + L2gradient = std::tr1::get<2>(GetParam()); + + cv::gpu::setDevice(devInfo.deviceID()); + + low_thresh = 50.0; + high_thresh = 100.0; + + cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, L2gradient); + } +}; + +cv::Mat Canny::img; + +TEST_P(Canny, Accuracy) +{ + PRINT_PARAM(devInfo); + PRINT_PARAM(apperture_size); + PRINT_PARAM(L2gradient); + + cv::Mat edges; + + ASSERT_NO_THROW( + cv::gpu::GpuMat d_edges; + + cv::gpu::Canny(cv::gpu::GpuMat(img), d_edges, low_thresh, high_thresh, apperture_size, L2gradient); + + d_edges.download(edges); + ); + + EXPECT_MAT_SIMILAR(edges_gold, edges, 1.0); +} + +INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine( + testing::ValuesIn(devices()), + testing::Values(3, 5), + testing::Values(false, true))); + #endif // HAVE_CUDA diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index a101191ca..ef2e2b7a6 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -937,3 +937,25 @@ TEST(equalizeHist) GPU_OFF; } } + + +TEST(Canny) +{ + Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); + + if (img.empty()) throw runtime_error("can't open aloeL.jpg"); + + Mat edges(img.size(), CV_8UC1); + + CPU_ON; + Canny(img, edges, 50.0, 100.0); + CPU_OFF; + + gpu::GpuMat d_img(img); + gpu::GpuMat d_edges(img.size(), CV_8UC1); + gpu::CannyBuf d_buf(img.size()); + + GPU_ON; + gpu::Canny(d_img, d_buf, d_edges, 50.0, 100.0); + GPU_OFF; +}