diff --git a/modules/core/include/opencv2/core/gpumat.hpp b/modules/core/include/opencv2/core/gpumat.hpp index ef86c5a20..989335925 100644 --- a/modules/core/include/opencv2/core/gpumat.hpp +++ b/modules/core/include/opencv2/core/gpumat.hpp @@ -112,6 +112,8 @@ namespace cv { namespace gpu int multiProcessorCount() const { return multi_processor_count_; } + size_t sharedMemPerBlock() const { return sharedMemPerBlock_; } + size_t freeMemory() const; size_t totalMemory() const; @@ -133,6 +135,7 @@ namespace cv { namespace gpu int multi_processor_count_; int majorVersion_; int minorVersion_; + size_t sharedMemPerBlock_; }; CV_EXPORTS void printCudaDeviceInfo(int device); diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index 25a3e7699..c901bf492 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -42,7 +42,6 @@ #include "precomp.hpp" #include "opencv2/core/gpumat.hpp" - #include #ifdef HAVE_CUDA @@ -301,6 +300,7 @@ void cv::gpu::DeviceInfo::query() multi_processor_count_ = prop.multiProcessorCount; majorVersion_ = prop.major; minorVersion_ = prop.minor; + sharedMemPerBlock_ = prop.sharedMemPerBlock; } void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) const diff --git a/modules/features2d/src/matchers.cpp b/modules/features2d/src/matchers.cpp index d2f71d003..091feaaa8 100755 --- a/modules/features2d/src/matchers.cpp +++ b/modules/features2d/src/matchers.cpp @@ -270,7 +270,7 @@ void DescriptorMatcher::knnMatch( const Mat& queryDescriptors, vector 0 ); - + checkMasks( masks, queryDescriptors.rows ); train(); @@ -285,7 +285,7 @@ void DescriptorMatcher::radiusMatch( const Mat& queryDescriptors, vector std::numeric_limits::epsilon() ); - + checkMasks( masks, queryDescriptors.rows ); train(); @@ -315,9 +315,9 @@ bool DescriptorMatcher::isMaskedOut( const vector& masks, int queryIdx ) return !masks.empty() && outCount == masks.size() ; } - + /////////////////////////////////////////////////////////////////////////////////////////////////////// - + BFMatcher::BFMatcher( int _normType, bool _crossCheck ) { normType = _normType; @@ -342,24 +342,24 @@ void BFMatcher::knnMatchImpl( const Mat& queryDescriptors, vector { const int IMGIDX_SHIFT = 18; const int IMGIDX_ONE = (1 << IMGIDX_SHIFT); - + if( queryDescriptors.empty() || trainDescCollection.empty() ) { matches.clear(); return; } CV_Assert( queryDescriptors.type() == trainDescCollection[0].type() ); - + matches.reserve(queryDescriptors.rows); - + Mat dist, nidx; - + int iIdx, imgCount = (int)trainDescCollection.size(), update = 0; int dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 || (normType == NORM_L1 && queryDescriptors.type() == CV_8U) ? CV_32S : CV_32F; - + CV_Assert( (int64)imgCount*IMGIDX_ONE < INT_MAX ); - + for( iIdx = 0; iIdx < imgCount; iIdx++ ) { CV_Assert( trainDescCollection[iIdx].rows < IMGIDX_ONE ); @@ -367,23 +367,23 @@ void BFMatcher::knnMatchImpl( const Mat& queryDescriptors, vector normType, knn, masks.empty() ? Mat() : masks[iIdx], update, crossCheck); update += IMGIDX_ONE; } - + if( dtype == CV_32S ) { Mat temp; dist.convertTo(temp, CV_32F); dist = temp; } - + for( int qIdx = 0; qIdx < queryDescriptors.rows; qIdx++ ) { const float* distptr = dist.ptr(qIdx); const int* nidxptr = nidx.ptr(qIdx); - + matches.push_back( vector() ); vector& mq = matches.back(); mq.reserve(knn); - + for( int k = 0; k < nidx.cols; k++ ) { if( nidxptr[k] < 0 ) @@ -391,13 +391,13 @@ void BFMatcher::knnMatchImpl( const Mat& queryDescriptors, vector mq.push_back( DMatch(qIdx, nidxptr[k] & (IMGIDX_ONE - 1), nidxptr[k] >> IMGIDX_SHIFT, distptr[k]) ); } - + if( mq.empty() && compactResult ) matches.pop_back(); } } - + void BFMatcher::radiusMatchImpl( const Mat& queryDescriptors, vector >& matches, float maxDistance, const vector& masks, bool compactResult ) { @@ -407,14 +407,14 @@ void BFMatcher::radiusMatchImpl( const Mat& queryDescriptors, vector(qIdx); - + const float* distptr = distf.ptr(qIdx); + vector& mq = matches[qIdx]; - for( int k = 0; k < dist.cols; k++ ) + for( int k = 0; k < distf.cols; k++ ) { if( distptr[k] <= maxDistance ) mq.push_back( DMatch(qIdx, k, iIdx, distptr[k]) ); } } } - + int qIdx0 = 0; for( int qIdx = 0; qIdx < queryDescriptors.rows; qIdx++ ) { if( matches[qIdx].empty() && compactResult ) continue; - + if( qIdx0 < qIdx ) std::swap(matches[qIdx], matches[qIdx0]); - + std::sort( matches[qIdx0].begin(), matches[qIdx0].end() ); qIdx0++; } } - + /////////////////////////////////////////////////////////////////////////////////////////////////////// - + /* * Factory function for DescriptorMatcher creating */ @@ -1025,7 +1025,7 @@ void GenericDescriptorMatcher::knnMatch( const Mat& queryImage, vector KeyPointsFilter::runByImageBorder( queryKeypoints, queryImage.size(), 0 ); KeyPointsFilter::runByKeypointSize( queryKeypoints, std::numeric_limits::epsilon() ); - + train(); knnMatchImpl( queryImage, queryKeypoints, matches, knn, masks, compactResult ); } @@ -1041,7 +1041,7 @@ void GenericDescriptorMatcher::radiusMatch( const Mat& queryImage, vector::epsilon() ); - + train(); radiusMatchImpl( queryImage, queryKeypoints, matches, maxDistance, masks, compactResult ); } @@ -1065,7 +1065,7 @@ Ptr GenericDescriptorMatcher::create( const string& ge { Ptr descriptorMatcher = Algorithm::create("DescriptorMatcher." + genericDescritptorMatcherType); - + if( !paramsFilename.empty() && !descriptorMatcher.empty() ) { FileStorage fs = FileStorage( paramsFilename, FileStorage::READ ); diff --git a/modules/gpu/doc/image_processing.rst b/modules/gpu/doc/image_processing.rst index 1ad421c90..eb2561ec4 100644 --- a/modules/gpu/doc/image_processing.rst +++ b/modules/gpu/doc/image_processing.rst @@ -885,3 +885,94 @@ Finds edges in an image using the [Canny86]_ algorithm. .. seealso:: :ocv:func:`Canny` + + +gpu::HoughLines +--------------- +Finds lines in a binary image using the classical Hough transform. + +.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) + +.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) + + :param src: 8-bit, single-channel binary source image. + + :param lines: Output vector of lines. Each line is represented by a two-element vector :math:`(\rho, \theta)` . :math:`\rho` is the distance from the coordinate origin :math:`(0,0)` (top-left corner of the image). :math:`\theta` is the line rotation angle in radians ( :math:`0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}` ). + + :param rho: Distance resolution of the accumulator in pixels. + + :param theta: Angle resolution of the accumulator in radians. + + :param threshold: Accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ). + + :param doSort: Performs lines sort by votes. + + :param maxLines: Maximum number of output lines. + + :param accum: Optional buffer for accumulator to avoid extra memory allocations (for many calls with the same sizes). + + :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). + +.. seealso:: :ocv:func:`HoughLines` + + + +gpu::HoughLinesTransform +------------------------ +Performs classical Hough transform for line detection. + +.. ocv:function:: void gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta) + + :param src: 8-bit, single-channel binary source image. + + :param accum: Output accumulator array. + + :param buf: Buffer to avoid extra memory allocations (for many calls with the same sizes). + + :param rho: Distance resolution of the accumulator in pixels. + + :param theta: Angle resolution of the accumulator in radians. + + :param threshold: Accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ). + +.. seealso:: :ocv:func:`gpu::HoughLines` + + + +gpu::HoughLinesGet +------------------ +Finds lines in Hough space. + +.. ocv:function:: void gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) + + :param accum: Accumulator array. + + :param lines: Output vector of lines. Each line is represented by a two-element vector :math:`(\rho, \theta)` . :math:`\rho` is the distance from the coordinate origin :math:`(0,0)` (top-left corner of the image). :math:`\theta` is the line rotation angle in radians ( :math:`0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}` ). + + :param rho: Distance resolution of the accumulator in pixels. + + :param theta: Angle resolution of the accumulator in radians. + + :param threshold: Accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ). + + :param doSort: Performs lines sort by votes. + + :param maxLines: Maximum number of output lines. + +.. seealso:: :ocv:func:`gpu::HoughLines` + + + +gpu::HoughLinesDownload +----------------------- +Downloads results from :ocv:func:`gpu::HoughLines` to host memory. + +.. ocv:function:: void gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) + + :param d_lines: Result of :ocv:func:`gpu::HoughLines` . + + :param h_lines: Output host array. + + :param h_votes: Optional output array for line's votes. + +.. seealso:: :ocv:func:`gpu::HoughLines` diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index ca9ad8988..a7f0ab32d 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -820,6 +820,12 @@ private: int nLayers_; }; +CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); +CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); +CV_EXPORTS void HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta); +CV_EXPORTS void HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); +CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray()); + ////////////////////////////// Matrix reductions ////////////////////////////// //! computes mean value and standard deviation of all or selected array elements diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index b5c986d22..0dbcd34c6 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -1331,4 +1331,51 @@ INSTANTIATE_TEST_CASE_P(ImgProc, ImagePyramid_getLayer, testing::Combine( MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)))); +////////////////////////////////////////////////////////////////////// +// HoughLines + +IMPLEMENT_PARAM_CLASS(DoSort, bool) + +GPU_PERF_TEST(HoughLines, cv::gpu::DeviceInfo, cv::Size, DoSort) +{ + declare.time(30.0); + + const cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + const cv::Size size = GET_PARAM(1); + const bool doSort = GET_PARAM(2); + + const float rho = 1.0f; + const float theta = CV_PI / 180.0f; + const int threshold = 300; + + cv::RNG rng(123456789); + + cv::Mat src(size, CV_8UC1, cv::Scalar::all(0)); + + const int numLines = rng.uniform(500, 2000); + for (int i = 0; i < numLines; ++i) + { + cv::Point p1(rng.uniform(0, src.cols), rng.uniform(0, src.rows)); + cv::Point p2(rng.uniform(0, src.cols), rng.uniform(0, src.rows)); + cv::line(src, p1, p2, cv::Scalar::all(255), 2); + } + + cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat d_lines; + cv::gpu::GpuMat d_accum; + cv::gpu::GpuMat d_buf; + cv::gpu::HoughLines(d_src, d_lines, d_accum, d_buf, rho, theta, threshold, doSort); + + TEST_CYCLE() + { + cv::gpu::HoughLines(d_src, d_lines, d_accum, d_buf, rho, theta, threshold, doSort); + } +} + +INSTANTIATE_TEST_CASE_P(ImgProc, HoughLines, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(DoSort(false), DoSort(true)))); + #endif diff --git a/modules/gpu/perf_cpu/perf_imgproc.cpp b/modules/gpu/perf_cpu/perf_imgproc.cpp index b6686b7ed..1b3c0951c 100644 --- a/modules/gpu/perf_cpu/perf_imgproc.cpp +++ b/modules/gpu/perf_cpu/perf_imgproc.cpp @@ -727,4 +727,45 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine( CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR), CvtColorInfo(4, 4, cv::COLOR_RGBA2mRGBA)))); +////////////////////////////////////////////////////////////////////// +// HoughLines + +IMPLEMENT_PARAM_CLASS(DoSort, bool) + +GPU_PERF_TEST(HoughLines, cv::gpu::DeviceInfo, cv::Size, DoSort) +{ + declare.time(30.0); + + const cv::Size size = GET_PARAM(1); + + const float rho = 1.0f; + const float theta = CV_PI / 180.0f; + const int threshold = 300; + + cv::RNG rng(123456789); + + cv::Mat src(size, CV_8UC1, cv::Scalar::all(0)); + + const int numLines = rng.uniform(500, 2000); + for (int i = 0; i < numLines; ++i) + { + cv::Point p1(rng.uniform(0, src.cols), rng.uniform(0, src.rows)); + cv::Point p2(rng.uniform(0, src.cols), rng.uniform(0, src.rows)); + cv::line(src, p1, p2, cv::Scalar::all(255), 2); + } + + std::vector lines; + cv::HoughLines(src, lines, rho, theta, threshold); + + TEST_CYCLE() + { + cv::HoughLines(src, lines, rho, theta, threshold); + } +} + +INSTANTIATE_TEST_CASE_P(ImgProc, HoughLines, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(DoSort(false), DoSort(true)))); + #endif diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/hough.cu new file mode 100644 index 000000000..388223e31 --- /dev/null +++ b/modules/gpu/src/cuda/hough.cu @@ -0,0 +1,295 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/emulation.hpp" + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + __device__ int g_counter; + + //////////////////////////////////////////////////////////////////////// + // buildPointList + + const int PIXELS_PER_THREAD = 16; + + __global__ void buildPointList(const DevMem2Db src, unsigned int* list) + { + __shared__ int s_queues[4][32 * PIXELS_PER_THREAD]; + __shared__ int s_qsize[4]; + __shared__ int s_start[4]; + + const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (y >= src.rows) + return; + + if (threadIdx.x == 0) + s_qsize[threadIdx.y] = 0; + + __syncthreads(); + + // fill the queue + for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x) + { + if (src(y, xx)) + { + const unsigned int val = (y << 16) | xx; + const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1); + s_queues[threadIdx.y][qidx] = val; + } + } + + __syncthreads(); + + // let one thread reserve the space required in the global list + if (threadIdx.x == 0 && threadIdx.y == 0) + { + // find how many items are stored in each list + int total_size = 0; + for (int i = 0; i < blockDim.y; ++i) + { + s_start[i] = total_size; + total_size += s_qsize[i]; + } + + // calculate the offset in the global list + const int global_offset = atomicAdd(&g_counter, total_size); + for (int i = 0; i < blockDim.y; ++i) + s_start[i] += global_offset; + } + + __syncthreads(); + + // copy local queues to global queue + const int qsize = s_qsize[threadIdx.y]; + for(int i = threadIdx.x; i < qsize; i += blockDim.x) + { + const unsigned int val = s_queues[threadIdx.y][i]; + list[s_start[threadIdx.y] + i] = val; + } + } + + int buildPointList_gpu(DevMem2Db src, unsigned int* list) + { + void* counter_ptr; + cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); + + cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); + + const dim3 block(32, 4); + const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(buildPointList, cudaFuncCachePreferShared) ); + + buildPointList<<>>(src, list); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int total_count; + cudaSafeCall( cudaMemcpy(&total_count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); + + return total_count; + } + + //////////////////////////////////////////////////////////////////////// + // linesAccum + + __global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) + { + const int n = blockIdx.x; + const float ang = n * theta; + + float sin_ang; + float cos_ang; + sincosf(ang, &sin_ang, &cos_ang); + + const float tabSin = sin_ang * irho; + const float tabCos = cos_ang * irho; + + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + const unsigned int qvalue = list[i]; + + const int x = (qvalue & 0x0000FFFF); + const int y = (qvalue >> 16) & 0x0000FFFF; + + int r = __float2int_rn(x * tabCos + y * tabSin); + r += (numrho - 1) / 2; + + ::atomicAdd(accum.ptr(n + 1) + r + 1, 1); + } + } + + __global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) + { + extern __shared__ int smem[]; + + for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) + smem[i] = 0; + + __syncthreads(); + + const int n = blockIdx.x; + const float ang = n * theta; + + float sin_ang; + float cos_ang; + sincosf(ang, &sin_ang, &cos_ang); + + const float tabSin = sin_ang * irho; + const float tabCos = cos_ang * irho; + + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + const unsigned int qvalue = list[i]; + + const int x = (qvalue & 0x0000FFFF); + const int y = (qvalue >> 16) & 0x0000FFFF; + + int r = __float2int_rn(x * tabCos + y * tabSin); + r += (numrho - 1) / 2; + + Emulation::smem::atomicAdd(&smem[r + 1], 1); + } + + __syncthreads(); + + for (int i = threadIdx.x; i < numrho; i += blockDim.x) + accum(n + 1, i) = smem[i]; + } + + void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock) + { + const dim3 block(1024); + const dim3 grid(accum.rows - 2); + + cudaSafeCall( cudaFuncSetCacheConfig(linesAccumShared, cudaFuncCachePreferShared) ); + + size_t smemSize = (accum.cols - 1) * sizeof(int); + + if (smemSize < sharedMemPerBlock - 1000) + linesAccumShared<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); + else + linesAccumGlobal<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); + + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + //////////////////////////////////////////////////////////////////////// + // linesGetResult + + __global__ void linesGetResult(const DevMem2Di accum, float2* out, int* votes, const int maxSize, const float threshold, const float theta, const float rho, const int numrho) + { + __shared__ int smem[8][32]; + + int r = blockIdx.x * (blockDim.x - 2) + threadIdx.x; + int n = blockIdx.y * (blockDim.y - 2) + threadIdx.y; + + if (r >= accum.cols || n >= accum.rows) + return; + + smem[threadIdx.y][threadIdx.x] = accum(n, r); + __syncthreads(); + + r -= 1; + n -= 1; + + if (threadIdx.x == 0 || threadIdx.x == blockDim.x - 1 || threadIdx.y == 0 || threadIdx.y == blockDim.y - 1 || r >= accum.cols - 2 || n >= accum.rows - 2) + return; + + if (smem[threadIdx.y][threadIdx.x] > threshold && + smem[threadIdx.y][threadIdx.x] > smem[threadIdx.y - 1][threadIdx.x] && + smem[threadIdx.y][threadIdx.x] >= smem[threadIdx.y + 1][threadIdx.x] && + smem[threadIdx.y][threadIdx.x] > smem[threadIdx.y][threadIdx.x - 1] && + smem[threadIdx.y][threadIdx.x] >= smem[threadIdx.y][threadIdx.x + 1]) + { + const float radius = (r - (numrho - 1) * 0.5f) * rho; + const float angle = n * theta; + + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxSize) + { + out[ind] = make_float2(radius, angle); + votes[ind] = smem[threadIdx.y][threadIdx.x]; + } + } + } + + int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, float threshold, bool doSort) + { + void* counter_ptr; + cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); + + cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(accum.cols, block.x - 2), divUp(accum.rows, block.y - 2)); + + linesGetResult<<>>(accum, out, votes, maxSize, threshold, theta, rho, accum.cols - 2); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int total_count; + cudaSafeCall( cudaMemcpy(&total_count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); + + total_count = ::min(total_count, maxSize); + + if (doSort && total_count > 0) + { + thrust::device_ptr out_ptr(out); + thrust::device_ptr votes_ptr(votes); + thrust::sort_by_key(votes_ptr, votes_ptr + total_count, out_ptr, thrust::greater()); + } + + return total_count; + } + } +}}} diff --git a/modules/gpu/src/hough.cpp b/modules/gpu/src/hough.cpp new file mode 100644 index 000000000..71d8ac07f --- /dev/null +++ b/modules/gpu/src/hough.cpp @@ -0,0 +1,144 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#if !defined (HAVE_CUDA) + +void cv::gpu::HoughLinesTransform(const GpuMat&, GpuMat&, GpuMat&, float, float) { throw_nogpu(); } +void cv::gpu::HoughLinesGet(const GpuMat&, GpuMat&, float, float, int, bool, int) { throw_nogpu(); } +void cv::gpu::HoughLines(const GpuMat&, GpuMat&, float, float, int, bool, int) { throw_nogpu(); } +void cv::gpu::HoughLines(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, float, int, bool, int) { throw_nogpu(); } +void cv::gpu::HoughLinesDownload(const GpuMat&, OutputArray, OutputArray) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + int buildPointList_gpu(DevMem2Db src, unsigned int* list); + void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock); + int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, float threshold, bool doSort); + } +}}} + +void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta) +{ + using namespace cv::gpu::device::hough; + + CV_Assert(src.type() == CV_8UC1); + CV_Assert(src.cols < std::numeric_limits::max()); + CV_Assert(src.rows < std::numeric_limits::max()); + + ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf); + + const int count = buildPointList_gpu(src, buf.ptr()); + + const int numangle = cvRound(CV_PI / theta); + const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); + + CV_Assert(numangle > 0 && numrho > 0); + + ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum); + accum.setTo(cv::Scalar::all(0)); + + cv::gpu::DeviceInfo devInfo; + + if (count > 0) + linesAccum_gpu(buf.ptr(), count, accum, rho, theta, devInfo.sharedMemPerBlock()); +} + +void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines) +{ + using namespace cv::gpu::device; + + CV_Assert(accum.type() == CV_32SC1); + + ensureSizeIsEnough(2, maxLines, CV_32FC2, lines); + + int count = hough::linesGetResult_gpu(accum, lines.ptr(0), lines.ptr(1), maxLines, rho, theta, threshold, doSort); + + if (count > 0) + lines.cols = count; + else + lines.release(); +} + +void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines) +{ + cv::gpu::GpuMat accum, buf; + HoughLines(src, lines, accum, buf, rho, theta, threshold, doSort, maxLines); +} + +void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort, int maxLines) +{ + HoughLinesTransform(src, accum, buf, rho, theta); + HoughLinesGet(accum, lines, rho, theta, threshold, doSort, maxLines); +} + +void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, OutputArray h_votes_) +{ + if (d_lines.empty()) + { + h_lines_.release(); + if (h_votes_.needed()) + h_votes_.release(); + return; + } + + CV_Assert(d_lines.rows == 2 && d_lines.type() == CV_32FC2); + + h_lines_.create(1, d_lines.cols, CV_32FC2); + cv::Mat h_lines = h_lines_.getMat(); + d_lines.row(0).download(h_lines); + + if (h_votes_.needed()) + { + h_votes_.create(1, d_lines.cols, CV_32SC1); + cv::Mat h_votes = h_votes_.getMat(); + cv::gpu::GpuMat d_votes(1, d_lines.cols, CV_32SC1, const_cast(d_lines.ptr(1))); + d_votes.download(h_votes); + } +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/opencv2/gpu/device/emulation.hpp b/modules/gpu/src/opencv2/gpu/device/emulation.hpp index e116c50ad..1a6f5794c 100644 --- a/modules/gpu/src/opencv2/gpu/device/emulation.hpp +++ b/modules/gpu/src/opencv2/gpu/device/emulation.hpp @@ -99,7 +99,7 @@ namespace cv { namespace gpu { namespace device } template - static __device__ __forceinline__ void atomicAdd(T* address, T val) + static __device__ __forceinline__ T atomicAdd(T* address, T val) { #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) T count; @@ -110,8 +110,10 @@ namespace cv { namespace gpu { namespace device count = tag | (count + val); *address = count; } while (*address != count); + + return (count & TAG_MASK) - val; #else - ::atomicAdd(address, val); + return ::atomicAdd(address, val); #endif } @@ -134,4 +136,4 @@ namespace cv { namespace gpu { namespace device }; }}} // namespace cv { namespace gpu { namespace device -#endif /* OPENCV_GPU_EMULATION_HPP_ */ \ No newline at end of file +#endif /* OPENCV_GPU_EMULATION_HPP_ */ diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index 388badf45..4d67de59d 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -1124,4 +1124,65 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerMinEigen, testing::Combine( testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)), testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7)))); +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// HoughLines + +PARAM_TEST_CASE(HoughLines, cv::gpu::DeviceInfo, std::string) +{ +}; + +void drawLines(cv::Mat& dst, const std::vector& lines) +{ + for (size_t i = 0; i < lines.size(); ++i) + { + float rho = lines[i][0], theta = lines[i][1]; + cv::Point pt1, pt2; + double a = std::cos(theta), b = std::sin(theta); + double x0 = a*rho, y0 = b*rho; + pt1.x = cvRound(x0 + 1000*(-b)); + pt1.y = cvRound(y0 + 1000*(a)); + pt2.x = cvRound(x0 - 1000*(-b)); + pt2.y = cvRound(y0 - 1000*(a)); + cv::line(dst, pt1, pt2, cv::Scalar::all(255)); + } +} + +TEST_P(HoughLines, Accuracy) +{ + const cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + const std::string fileName = GET_PARAM(1); + + const float rho = 1.0f; + const float theta = CV_PI / 180.0f; + const int threshold = 50; + + cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(img.empty()); + + cv::Mat edges; + cv::Canny(img, edges, 50, 200); + + cv::gpu::GpuMat d_lines; + cv::gpu::HoughLines(loadMat(edges), d_lines, rho, theta, threshold); + std::vector lines; + cv::gpu::HoughLinesDownload(d_lines, lines); + cv::Mat dst(img.size(), CV_8UC1, cv::Scalar::all(0)); + drawLines(dst, lines); + + std::vector lines_gold; + cv::HoughLines(edges, lines_gold, rho, theta, threshold); + cv::Mat dst_gold(img.size(), CV_8UC1, cv::Scalar::all(0)); + drawLines(dst_gold, lines_gold); + + ASSERT_MAT_NEAR(dst_gold, dst, 0.0); +} + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HoughLines, testing::Combine( + ALL_DEVICES, + testing::Values(std::string("../cv/shared/pic1.png"), + std::string("../cv/shared/pic3.png"), + std::string("../cv/shared/pic5.png"), + std::string("../cv/shared/pic6.png")))); + } // namespace diff --git a/modules/video/src/bgfg_gmg.cpp b/modules/video/src/bgfg_gmg.cpp index 448f386bf..f2e52b593 100644 --- a/modules/video/src/bgfg_gmg.cpp +++ b/modules/video/src/bgfg_gmg.cpp @@ -197,7 +197,7 @@ namespace void operator() (const cv::Range& range) const; private: - const cv::Mat frame_; + cv::Mat frame_; mutable cv::Mat_ fgmask_;