diff --git a/modules/gpuimgproc/src/cuda/build_point_list.cu b/modules/gpuimgproc/src/cuda/build_point_list.cu new file mode 100644 index 000000000..c5f2b23f6 --- /dev/null +++ b/modules/gpuimgproc/src/cuda/build_point_list.cu @@ -0,0 +1,138 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/emulation.hpp" + +namespace cv { namespace gpu { namespace cudev +{ + namespace hough + { + __device__ int g_counter; + + template + __global__ void buildPointList(const PtrStepSzb src, unsigned int* list) + { + __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD]; + __shared__ int s_qsize[4]; + __shared__ int s_globStart[4]; + + const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (threadIdx.x == 0) + s_qsize[threadIdx.y] = 0; + __syncthreads(); + + if (y < src.rows) + { + // fill the queue + const uchar* srcRow = src.ptr(y); + for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x) + { + if (srcRow[xx]) + { + const unsigned int val = (y << 16) | xx; + const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1); + s_queues[threadIdx.y][qidx] = val; + } + } + } + + __syncthreads(); + + // let one thread reserve the space required in the global list + if (threadIdx.x == 0 && threadIdx.y == 0) + { + // find how many items are stored in each list + int totalSize = 0; + for (int i = 0; i < blockDim.y; ++i) + { + s_globStart[i] = totalSize; + totalSize += s_qsize[i]; + } + + // calculate the offset in the global list + const int globalOffset = atomicAdd(&g_counter, totalSize); + for (int i = 0; i < blockDim.y; ++i) + s_globStart[i] += globalOffset; + } + + __syncthreads(); + + // copy local queues to global queue + const int qsize = s_qsize[threadIdx.y]; + int gidx = s_globStart[threadIdx.y] + threadIdx.x; + for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) + list[gidx] = s_queues[threadIdx.y][i]; + } + + int buildPointList_gpu(PtrStepSzb src, unsigned int* list) + { + const int PIXELS_PER_THREAD = 16; + + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 4); + const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(buildPointList, cudaFuncCachePreferShared) ); + + buildPointList<<>>(src, list); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + return totalCount; + } + } +}}} + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpuimgproc/src/cuda/generalized_hough.cu b/modules/gpuimgproc/src/cuda/generalized_hough.cu new file mode 100644 index 000000000..9ae5a595b --- /dev/null +++ b/modules/gpuimgproc/src/cuda/generalized_hough.cu @@ -0,0 +1,1085 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include +#include + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/emulation.hpp" +#include "opencv2/core/cuda/vec_math.hpp" + +#include "opencv2/opencv_modules.hpp" + +#ifdef HAVE_OPENCV_GPUARITHM + +namespace cv { namespace gpu { namespace cudev +{ + namespace ght + { + __device__ int g_counter; + + template + __global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep dx, const PtrStep dy, + unsigned int* coordList, float* thetaList) + { + __shared__ unsigned int s_coordLists[4][32 * PIXELS_PER_THREAD]; + __shared__ float s_thetaLists[4][32 * PIXELS_PER_THREAD]; + __shared__ int s_sizes[4]; + __shared__ int s_globStart[4]; + + const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (threadIdx.x == 0) + s_sizes[threadIdx.y] = 0; + __syncthreads(); + + if (y < edges.rows) + { + // fill the queue + const uchar* edgesRow = edges.ptr(y); + const T* dxRow = dx.ptr(y); + const T* dyRow = dy.ptr(y); + + for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < edges.cols; ++i, xx += blockDim.x) + { + const T dxVal = dxRow[xx]; + const T dyVal = dyRow[xx]; + + if (edgesRow[xx] && (dxVal != 0 || dyVal != 0)) + { + const unsigned int coord = (y << 16) | xx; + + float theta = ::atan2f(dyVal, dxVal); + if (theta < 0) + theta += 2.0f * CV_PI_F; + + const int qidx = Emulation::smem::atomicAdd(&s_sizes[threadIdx.y], 1); + + s_coordLists[threadIdx.y][qidx] = coord; + s_thetaLists[threadIdx.y][qidx] = theta; + } + } + } + + __syncthreads(); + + // let one thread reserve the space required in the global list + if (threadIdx.x == 0 && threadIdx.y == 0) + { + // find how many items are stored in each list + int totalSize = 0; + for (int i = 0; i < blockDim.y; ++i) + { + s_globStart[i] = totalSize; + totalSize += s_sizes[i]; + } + + // calculate the offset in the global list + const int globalOffset = atomicAdd(&g_counter, totalSize); + for (int i = 0; i < blockDim.y; ++i) + s_globStart[i] += globalOffset; + } + + __syncthreads(); + + // copy local queues to global queue + const int qsize = s_sizes[threadIdx.y]; + int gidx = s_globStart[threadIdx.y] + threadIdx.x; + for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) + { + coordList[gidx] = s_coordLists[threadIdx.y][i]; + thetaList[gidx] = s_thetaLists[threadIdx.y][i]; + } + } + + template + int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList) + { + const int PIXELS_PER_THREAD = 8; + + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 4); + const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList, cudaFuncCachePreferShared) ); + + buildEdgePointList<<>>(edges, (PtrStepSz) dx, (PtrStepSz) dy, coordList, thetaList); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + return totalCount; + } + + template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); + template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); + template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); + + __global__ void buildRTable(const unsigned int* coordList, const float* thetaList, const int pointsCount, + PtrStep r_table, int* r_sizes, int maxSize, + const short2 templCenter, const float thetaScale) + { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid >= pointsCount) + return; + + const unsigned int coord = coordList[tid]; + short2 p; + p.x = (coord & 0xFFFF); + p.y = (coord >> 16) & 0xFFFF; + + const float theta = thetaList[tid]; + const int n = __float2int_rn(theta * thetaScale); + + const int ind = ::atomicAdd(r_sizes + n, 1); + if (ind < maxSize) + r_table(n, ind) = p - templCenter; + } + + void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + PtrStepSz r_table, int* r_sizes, + short2 templCenter, int levels) + { + const dim3 block(256); + const dim3 grid(divUp(pointsCount, block.x)); + + const float thetaScale = levels / (2.0f * CV_PI_F); + + buildRTable<<>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + //////////////////////////////////////////////////////////////////////// + // Ballard_Pos + + __global__ void Ballard_Pos_calcHist(const unsigned int* coordList, const float* thetaList, const int pointsCount, + const PtrStep r_table, const int* r_sizes, + PtrStepSzi hist, + const float idp, const float thetaScale) + { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid >= pointsCount) + return; + + const unsigned int coord = coordList[tid]; + short2 p; + p.x = (coord & 0xFFFF); + p.y = (coord >> 16) & 0xFFFF; + + const float theta = thetaList[tid]; + const int n = __float2int_rn(theta * thetaScale); + + const short2* r_row = r_table.ptr(n); + const int r_row_size = r_sizes[n]; + + for (int j = 0; j < r_row_size; ++j) + { + short2 c = p - r_row[j]; + + c.x = __float2int_rn(c.x * idp); + c.y = __float2int_rn(c.y * idp); + + if (c.x >= 0 && c.x < hist.cols - 2 && c.y >= 0 && c.y < hist.rows - 2) + ::atomicAdd(hist.ptr(c.y + 1) + c.x + 1, 1); + } + } + + void Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + PtrStepSz r_table, const int* r_sizes, + PtrStepSzi hist, + float dp, int levels) + { + const dim3 block(256); + const dim3 grid(divUp(pointsCount, block.x)); + + const float idp = 1.0f / dp; + const float thetaScale = levels / (2.0f * CV_PI_F); + + Ballard_Pos_calcHist<<>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + __global__ void Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, + const int maxSize, const float dp, const int threshold) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= hist.cols - 2 || y >= hist.rows - 2) + return; + + const int curVotes = hist(y + 1, x + 1); + + if (curVotes > threshold && + curVotes > hist(y + 1, x) && + curVotes >= hist(y + 1, x + 2) && + curVotes > hist(y, x + 1) && + curVotes >= hist(y + 2, x + 1)) + { + const int ind = ::atomicAdd(&g_counter, 1); + + if (ind < maxSize) + { + out[ind] = make_float4(x * dp, y * dp, 1.0f, 0.0f); + votes[ind] = make_int3(curVotes, 0, 0); + } + } + } + + int Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) ); + + Ballard_Pos_findPosInHist<<>>(hist, out, votes, maxSize, dp, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxSize); + + return totalCount; + } + + //////////////////////////////////////////////////////////////////////// + // Ballard_PosScale + + __global__ void Ballard_PosScale_calcHist(const unsigned int* coordList, const float* thetaList, + PtrStep r_table, const int* r_sizes, + PtrStepi hist, const int rows, const int cols, + const float minScale, const float scaleStep, const int scaleRange, + const float idp, const float thetaScale) + { + const unsigned int coord = coordList[blockIdx.x]; + float2 p; + p.x = (coord & 0xFFFF); + p.y = (coord >> 16) & 0xFFFF; + + const float theta = thetaList[blockIdx.x]; + const int n = __float2int_rn(theta * thetaScale); + + const short2* r_row = r_table.ptr(n); + const int r_row_size = r_sizes[n]; + + for (int j = 0; j < r_row_size; ++j) + { + const float2 d = saturate_cast(r_row[j]); + + for (int s = threadIdx.x; s < scaleRange; s += blockDim.x) + { + const float scale = minScale + s * scaleStep; + + float2 c = p - scale * d; + + c.x *= idp; + c.y *= idp; + + if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows) + ::atomicAdd(hist.ptr((s + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1); + } + } + } + + void Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + PtrStepSz r_table, const int* r_sizes, + PtrStepi hist, int rows, int cols, + float minScale, float scaleStep, int scaleRange, + float dp, int levels) + { + const dim3 block(256); + const dim3 grid(pointsCount); + + const float idp = 1.0f / dp; + const float thetaScale = levels / (2.0f * CV_PI_F); + + Ballard_PosScale_calcHist<<>>(coordList, thetaList, + r_table, r_sizes, + hist, rows, cols, + minScale, scaleStep, scaleRange, + idp, thetaScale); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + __global__ void Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange, + float4* out, int3* votes, const int maxSize, + const float minScale, const float scaleStep, const float dp, const int threshold) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= cols || y >= rows) + return; + + for (int s = 0; s < scaleRange; ++s) + { + const float scale = minScale + s * scaleStep; + + const int prevScaleIdx = (s) * (rows + 2); + const int curScaleIdx = (s + 1) * (rows + 2); + const int nextScaleIdx = (s + 2) * (rows + 2); + + const int curVotes = hist(curScaleIdx + y + 1, x + 1); + + if (curVotes > threshold && + curVotes > hist(curScaleIdx + y + 1, x) && + curVotes >= hist(curScaleIdx + y + 1, x + 2) && + curVotes > hist(curScaleIdx + y, x + 1) && + curVotes >= hist(curScaleIdx + y + 2, x + 1) && + curVotes > hist(prevScaleIdx + y + 1, x + 1) && + curVotes >= hist(nextScaleIdx + y + 1, x + 1)) + { + const int ind = ::atomicAdd(&g_counter, 1); + + if (ind < maxSize) + { + out[ind] = make_float4(x * dp, y * dp, scale, 0.0f); + votes[ind] = make_int3(curVotes, curVotes, 0); + } + } + } + } + + int Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize, + float minScale, float scaleStep, float dp, int threshold) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) ); + + Ballard_PosScale_findPosInHist<<>>(hist, rows, cols, scaleRange, out, votes, + maxSize, minScale, scaleStep, dp, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxSize); + + return totalCount; + } + + //////////////////////////////////////////////////////////////////////// + // Ballard_PosRotation + + __global__ void Ballard_PosRotation_calcHist(const unsigned int* coordList, const float* thetaList, + PtrStep r_table, const int* r_sizes, + PtrStepi hist, const int rows, const int cols, + const float minAngle, const float angleStep, const int angleRange, + const float idp, const float thetaScale) + { + const unsigned int coord = coordList[blockIdx.x]; + float2 p; + p.x = (coord & 0xFFFF); + p.y = (coord >> 16) & 0xFFFF; + + const float thetaVal = thetaList[blockIdx.x]; + + for (int a = threadIdx.x; a < angleRange; a += blockDim.x) + { + const float angle = (minAngle + a * angleStep) * (CV_PI_F / 180.0f); + float sinA, cosA; + sincosf(angle, &sinA, &cosA); + + float theta = thetaVal - angle; + if (theta < 0) + theta += 2.0f * CV_PI_F; + + const int n = __float2int_rn(theta * thetaScale); + + const short2* r_row = r_table.ptr(n); + const int r_row_size = r_sizes[n]; + + for (int j = 0; j < r_row_size; ++j) + { + const float2 d = saturate_cast(r_row[j]); + + const float2 dr = make_float2(d.x * cosA - d.y * sinA, d.x * sinA + d.y * cosA); + + float2 c = make_float2(p.x - dr.x, p.y - dr.y); + c.x *= idp; + c.y *= idp; + + if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows) + ::atomicAdd(hist.ptr((a + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1); + } + } + } + + void Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + PtrStepSz r_table, const int* r_sizes, + PtrStepi hist, int rows, int cols, + float minAngle, float angleStep, int angleRange, + float dp, int levels) + { + const dim3 block(256); + const dim3 grid(pointsCount); + + const float idp = 1.0f / dp; + const float thetaScale = levels / (2.0f * CV_PI_F); + + Ballard_PosRotation_calcHist<<>>(coordList, thetaList, + r_table, r_sizes, + hist, rows, cols, + minAngle, angleStep, angleRange, + idp, thetaScale); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + __global__ void Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange, + float4* out, int3* votes, const int maxSize, + const float minAngle, const float angleStep, const float dp, const int threshold) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= cols || y >= rows) + return; + + for (int a = 0; a < angleRange; ++a) + { + const float angle = minAngle + a * angleStep; + + const int prevAngleIdx = (a) * (rows + 2); + const int curAngleIdx = (a + 1) * (rows + 2); + const int nextAngleIdx = (a + 2) * (rows + 2); + + const int curVotes = hist(curAngleIdx + y + 1, x + 1); + + if (curVotes > threshold && + curVotes > hist(curAngleIdx + y + 1, x) && + curVotes >= hist(curAngleIdx + y + 1, x + 2) && + curVotes > hist(curAngleIdx + y, x + 1) && + curVotes >= hist(curAngleIdx + y + 2, x + 1) && + curVotes > hist(prevAngleIdx + y + 1, x + 1) && + curVotes >= hist(nextAngleIdx + y + 1, x + 1)) + { + const int ind = ::atomicAdd(&g_counter, 1); + + if (ind < maxSize) + { + out[ind] = make_float4(x * dp, y * dp, 1.0f, angle); + votes[ind] = make_int3(curVotes, 0, curVotes); + } + } + } + } + + int Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize, + float minAngle, float angleStep, float dp, int threshold) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) ); + + Ballard_PosRotation_findPosInHist<<>>(hist, rows, cols, angleRange, out, votes, + maxSize, minAngle, angleStep, dp, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxSize); + + return totalCount; + } + + //////////////////////////////////////////////////////////////////////// + // Guil_Full + + struct FeatureTable + { + uchar* p1_pos_data; + size_t p1_pos_step; + + uchar* p1_theta_data; + size_t p1_theta_step; + + uchar* p2_pos_data; + size_t p2_pos_step; + + uchar* d12_data; + size_t d12_step; + + uchar* r1_data; + size_t r1_step; + + uchar* r2_data; + size_t r2_step; + }; + + __constant__ FeatureTable c_templFeatures; + __constant__ FeatureTable c_imageFeatures; + + void Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) + { + FeatureTable tbl; + + tbl.p1_pos_data = p1_pos.data; + tbl.p1_pos_step = p1_pos.step; + + tbl.p1_theta_data = p1_theta.data; + tbl.p1_theta_step = p1_theta.step; + + tbl.p2_pos_data = p2_pos.data; + tbl.p2_pos_step = p2_pos.step; + + tbl.d12_data = d12.data; + tbl.d12_step = d12.step; + + tbl.r1_data = r1.data; + tbl.r1_step = r1.step; + + tbl.r2_data = r2.data; + tbl.r2_step = r2.step; + + cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) ); + } + void Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) + { + FeatureTable tbl; + + tbl.p1_pos_data = p1_pos.data; + tbl.p1_pos_step = p1_pos.step; + + tbl.p1_theta_data = p1_theta.data; + tbl.p1_theta_step = p1_theta.step; + + tbl.p2_pos_data = p2_pos.data; + tbl.p2_pos_step = p2_pos.step; + + tbl.d12_data = d12.data; + tbl.d12_step = d12.step; + + tbl.r1_data = r1.data; + tbl.r1_step = r1.step; + + tbl.r2_data = r2.data; + tbl.r2_step = r2.step; + + cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) ); + } + + struct TemplFeatureTable + { + static __device__ float2* p1_pos(int n) + { + return (float2*)(c_templFeatures.p1_pos_data + n * c_templFeatures.p1_pos_step); + } + static __device__ float* p1_theta(int n) + { + return (float*)(c_templFeatures.p1_theta_data + n * c_templFeatures.p1_theta_step); + } + static __device__ float2* p2_pos(int n) + { + return (float2*)(c_templFeatures.p2_pos_data + n * c_templFeatures.p2_pos_step); + } + + static __device__ float* d12(int n) + { + return (float*)(c_templFeatures.d12_data + n * c_templFeatures.d12_step); + } + + static __device__ float2* r1(int n) + { + return (float2*)(c_templFeatures.r1_data + n * c_templFeatures.r1_step); + } + static __device__ float2* r2(int n) + { + return (float2*)(c_templFeatures.r2_data + n * c_templFeatures.r2_step); + } + }; + struct ImageFeatureTable + { + static __device__ float2* p1_pos(int n) + { + return (float2*)(c_imageFeatures.p1_pos_data + n * c_imageFeatures.p1_pos_step); + } + static __device__ float* p1_theta(int n) + { + return (float*)(c_imageFeatures.p1_theta_data + n * c_imageFeatures.p1_theta_step); + } + static __device__ float2* p2_pos(int n) + { + return (float2*)(c_imageFeatures.p2_pos_data + n * c_imageFeatures.p2_pos_step); + } + + static __device__ float* d12(int n) + { + return (float*)(c_imageFeatures.d12_data + n * c_imageFeatures.d12_step); + } + + static __device__ float2* r1(int n) + { + return (float2*)(c_imageFeatures.r1_data + n * c_imageFeatures.r1_step); + } + static __device__ float2* r2(int n) + { + return (float2*)(c_imageFeatures.r2_data + n * c_imageFeatures.r2_step); + } + }; + + __device__ float clampAngle(float a) + { + float res = a; + + while (res > 2.0f * CV_PI_F) + res -= 2.0f * CV_PI_F; + while (res < 0.0f) + res += 2.0f * CV_PI_F; + + return res; + } + + __device__ bool angleEq(float a, float b, float eps) + { + return (::fabs(clampAngle(a - b)) <= eps); + } + + template + __global__ void Guil_Full_buildFeatureList(const unsigned int* coordList, const float* thetaList, const int pointsCount, + int* sizes, const int maxSize, + const float xi, const float angleEpsilon, const float alphaScale, + const float2 center, const float maxDist) + { + const float p1_theta = thetaList[blockIdx.x]; + const unsigned int coord1 = coordList[blockIdx.x]; + float2 p1_pos; + p1_pos.x = (coord1 & 0xFFFF); + p1_pos.y = (coord1 >> 16) & 0xFFFF; + + for (int i = threadIdx.x; i < pointsCount; i += blockDim.x) + { + const float p2_theta = thetaList[i]; + const unsigned int coord2 = coordList[i]; + float2 p2_pos; + p2_pos.x = (coord2 & 0xFFFF); + p2_pos.y = (coord2 >> 16) & 0xFFFF; + + if (angleEq(p1_theta - p2_theta, xi, angleEpsilon)) + { + const float2 d = p1_pos - p2_pos; + + float alpha12 = clampAngle(::atan2(d.y, d.x) - p1_theta); + float d12 = ::sqrtf(d.x * d.x + d.y * d.y); + + if (d12 > maxDist) + continue; + + float2 r1 = p1_pos - center; + float2 r2 = p2_pos - center; + + const int n = __float2int_rn(alpha12 * alphaScale); + + const int ind = ::atomicAdd(sizes + n, 1); + + if (ind < maxSize) + { + if (!isTempl) + { + FT::p1_pos(n)[ind] = p1_pos; + FT::p2_pos(n)[ind] = p2_pos; + } + + FT::p1_theta(n)[ind] = p1_theta; + + FT::d12(n)[ind] = d12; + + if (isTempl) + { + FT::r1(n)[ind] = r1; + FT::r2(n)[ind] = r2; + } + } + } + } + } + + template + void Guil_Full_buildFeatureList_caller(const unsigned int* coordList, const float* thetaList, int pointsCount, + int* sizes, int maxSize, + float xi, float angleEpsilon, int levels, + float2 center, float maxDist) + { + const dim3 block(256); + const dim3 grid(pointsCount); + + const float alphaScale = levels / (2.0f * CV_PI_F); + + Guil_Full_buildFeatureList<<>>(coordList, thetaList, pointsCount, + sizes, maxSize, + xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale, + center, maxDist); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + thrust::device_ptr sizesPtr(sizes); + thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum(), maxSize)); + } + + void Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + int* sizes, int maxSize, + float xi, float angleEpsilon, int levels, + float2 center, float maxDist) + { + Guil_Full_buildFeatureList_caller(coordList, thetaList, pointsCount, + sizes, maxSize, + xi, angleEpsilon, levels, + center, maxDist); + } + void Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + int* sizes, int maxSize, + float xi, float angleEpsilon, int levels, + float2 center, float maxDist) + { + Guil_Full_buildFeatureList_caller(coordList, thetaList, pointsCount, + sizes, maxSize, + xi, angleEpsilon, levels, + center, maxDist); + } + + __global__ void Guil_Full_calcOHist(const int* templSizes, const int* imageSizes, int* OHist, + const float minAngle, const float maxAngle, const float iAngleStep, const int angleRange) + { + extern __shared__ int s_OHist[]; + for (int i = threadIdx.x; i <= angleRange; i += blockDim.x) + s_OHist[i] = 0; + __syncthreads(); + + const int tIdx = blockIdx.x; + const int level = blockIdx.y; + + const int tSize = templSizes[level]; + + if (tIdx < tSize) + { + const int imSize = imageSizes[level]; + + const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx]; + + for (int i = threadIdx.x; i < imSize; i += blockDim.x) + { + const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; + + const float angle = clampAngle(im_p1_theta - t_p1_theta); + + if (angle >= minAngle && angle <= maxAngle) + { + const int n = __float2int_rn((angle - minAngle) * iAngleStep); + Emulation::smem::atomicAdd(&s_OHist[n], 1); + } + } + } + __syncthreads(); + + for (int i = threadIdx.x; i <= angleRange; i += blockDim.x) + ::atomicAdd(OHist + i, s_OHist[i]); + } + + void Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist, + float minAngle, float maxAngle, float angleStep, int angleRange, + int levels, int tMaxSize) + { + const dim3 block(256); + const dim3 grid(tMaxSize, levels + 1); + + minAngle *= (CV_PI_F / 180.0f); + maxAngle *= (CV_PI_F / 180.0f); + angleStep *= (CV_PI_F / 180.0f); + + const size_t smemSize = (angleRange + 1) * sizeof(float); + + Guil_Full_calcOHist<<>>(templSizes, imageSizes, OHist, + minAngle, maxAngle, 1.0f / angleStep, angleRange); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + __global__ void Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist, + const float angle, const float angleEpsilon, + const float minScale, const float maxScale, const float iScaleStep, const int scaleRange) + { + extern __shared__ int s_SHist[]; + for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x) + s_SHist[i] = 0; + __syncthreads(); + + const int tIdx = blockIdx.x; + const int level = blockIdx.y; + + const int tSize = templSizes[level]; + + if (tIdx < tSize) + { + const int imSize = imageSizes[level]; + + const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle; + const float t_d12 = TemplFeatureTable::d12(level)[tIdx] + angle; + + for (int i = threadIdx.x; i < imSize; i += blockDim.x) + { + const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; + const float im_d12 = ImageFeatureTable::d12(level)[i]; + + if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon)) + { + const float scale = im_d12 / t_d12; + + if (scale >= minScale && scale <= maxScale) + { + const int s = __float2int_rn((scale - minScale) * iScaleStep); + Emulation::smem::atomicAdd(&s_SHist[s], 1); + } + } + } + } + __syncthreads(); + + for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x) + ::atomicAdd(SHist + i, s_SHist[i]); + } + + void Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist, + float angle, float angleEpsilon, + float minScale, float maxScale, float iScaleStep, int scaleRange, + int levels, int tMaxSize) + { + const dim3 block(256); + const dim3 grid(tMaxSize, levels + 1); + + angle *= (CV_PI_F / 180.0f); + angleEpsilon *= (CV_PI_F / 180.0f); + + const size_t smemSize = (scaleRange + 1) * sizeof(float); + + Guil_Full_calcSHist<<>>(templSizes, imageSizes, SHist, + angle, angleEpsilon, + minScale, maxScale, iScaleStep, scaleRange); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + __global__ void Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, + const float angle, const float sinVal, const float cosVal, const float angleEpsilon, const float scale, + const float idp) + { + const int tIdx = blockIdx.x; + const int level = blockIdx.y; + + const int tSize = templSizes[level]; + + if (tIdx < tSize) + { + const int imSize = imageSizes[level]; + + const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle; + + float2 r1 = TemplFeatureTable::r1(level)[tIdx]; + float2 r2 = TemplFeatureTable::r2(level)[tIdx]; + + r1 = r1 * scale; + r2 = r2 * scale; + + r1 = make_float2(cosVal * r1.x - sinVal * r1.y, sinVal * r1.x + cosVal * r1.y); + r2 = make_float2(cosVal * r2.x - sinVal * r2.y, sinVal * r2.x + cosVal * r2.y); + + for (int i = threadIdx.x; i < imSize; i += blockDim.x) + { + const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; + + const float2 im_p1_pos = ImageFeatureTable::p1_pos(level)[i]; + const float2 im_p2_pos = ImageFeatureTable::p2_pos(level)[i]; + + if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon)) + { + float2 c1, c2; + + c1 = im_p1_pos - r1; + c1 = c1 * idp; + + c2 = im_p2_pos - r2; + c2 = c2 * idp; + + if (::fabs(c1.x - c2.x) > 1 || ::fabs(c1.y - c2.y) > 1) + continue; + + if (c1.y >= 0 && c1.y < PHist.rows - 2 && c1.x >= 0 && c1.x < PHist.cols - 2) + ::atomicAdd(PHist.ptr(__float2int_rn(c1.y) + 1) + __float2int_rn(c1.x) + 1, 1); + } + } + } + } + + void Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, + float angle, float angleEpsilon, float scale, + float dp, + int levels, int tMaxSize) + { + const dim3 block(256); + const dim3 grid(tMaxSize, levels + 1); + + angle *= (CV_PI_F / 180.0f); + angleEpsilon *= (CV_PI_F / 180.0f); + + const float sinVal = ::sinf(angle); + const float cosVal = ::cosf(angle); + + cudaSafeCall( cudaFuncSetCacheConfig(Guil_Full_calcPHist, cudaFuncCachePreferL1) ); + + Guil_Full_calcPHist<<>>(templSizes, imageSizes, PHist, + angle, sinVal, cosVal, angleEpsilon, scale, + 1.0f / dp); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + __global__ void Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, + const float angle, const int angleVotes, const float scale, const int scaleVotes, + const float dp, const int threshold) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= hist.cols - 2 || y >= hist.rows - 2) + return; + + const int curVotes = hist(y + 1, x + 1); + + if (curVotes > threshold && + curVotes > hist(y + 1, x) && + curVotes >= hist(y + 1, x + 2) && + curVotes > hist(y, x + 1) && + curVotes >= hist(y + 2, x + 1)) + { + const int ind = ::atomicAdd(&g_counter, 1); + + if (ind < maxSize) + { + out[ind] = make_float4(x * dp, y * dp, scale, angle); + votes[ind] = make_int3(curVotes, scaleVotes, angleVotes); + } + } + } + + int Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize, + float angle, int angleVotes, float scale, int scaleVotes, + float dp, int threshold) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(Guil_Full_findPosInHist, cudaFuncCachePreferL1) ); + + Guil_Full_findPosInHist<<>>(hist, out, votes, maxSize, + angle, angleVotes, scale, scaleVotes, + dp, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxSize); + + return totalCount; + } + } +}}} + +#endif // HAVE_OPENCV_GPUARITHM + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpuimgproc/src/cuda/hough.cu b/modules/gpuimgproc/src/cuda/hough.cu deleted file mode 100644 index 696ed3845..000000000 --- a/modules/gpuimgproc/src/cuda/hough.cu +++ /dev/null @@ -1,1710 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include -#include - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/emulation.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/limits.hpp" -#include "opencv2/core/cuda/dynamic_smem.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace hough - { - __device__ int g_counter; - - //////////////////////////////////////////////////////////////////////// - // buildPointList - - template - __global__ void buildPointList(const PtrStepSzb src, unsigned int* list) - { - __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD]; - __shared__ int s_qsize[4]; - __shared__ int s_globStart[4]; - - const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (threadIdx.x == 0) - s_qsize[threadIdx.y] = 0; - __syncthreads(); - - if (y < src.rows) - { - // fill the queue - const uchar* srcRow = src.ptr(y); - for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x) - { - if (srcRow[xx]) - { - const unsigned int val = (y << 16) | xx; - const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1); - s_queues[threadIdx.y][qidx] = val; - } - } - } - - __syncthreads(); - - // let one thread reserve the space required in the global list - if (threadIdx.x == 0 && threadIdx.y == 0) - { - // find how many items are stored in each list - int totalSize = 0; - for (int i = 0; i < blockDim.y; ++i) - { - s_globStart[i] = totalSize; - totalSize += s_qsize[i]; - } - - // calculate the offset in the global list - const int globalOffset = atomicAdd(&g_counter, totalSize); - for (int i = 0; i < blockDim.y; ++i) - s_globStart[i] += globalOffset; - } - - __syncthreads(); - - // copy local queues to global queue - const int qsize = s_qsize[threadIdx.y]; - int gidx = s_globStart[threadIdx.y] + threadIdx.x; - for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) - list[gidx] = s_queues[threadIdx.y][i]; - } - - int buildPointList_gpu(PtrStepSzb src, unsigned int* list) - { - const int PIXELS_PER_THREAD = 16; - - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 4); - const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(buildPointList, cudaFuncCachePreferShared) ); - - buildPointList<<>>(src, list); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // linesAccum - - __global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) - { - const int n = blockIdx.x; - const float ang = n * theta; - - float sinVal; - float cosVal; - sincosf(ang, &sinVal, &cosVal); - sinVal *= irho; - cosVal *= irho; - - const int shift = (numrho - 1) / 2; - - int* accumRow = accum.ptr(n + 1); - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - const unsigned int val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - int r = __float2int_rn(x * cosVal + y * sinVal); - r += shift; - - ::atomicAdd(accumRow + r + 1, 1); - } - } - - __global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) - { - int* smem = DynamicSharedMem(); - - for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) - smem[i] = 0; - - __syncthreads(); - - const int n = blockIdx.x; - const float ang = n * theta; - - float sinVal; - float cosVal; - sincosf(ang, &sinVal, &cosVal); - sinVal *= irho; - cosVal *= irho; - - const int shift = (numrho - 1) / 2; - - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - const unsigned int val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - int r = __float2int_rn(x * cosVal + y * sinVal); - r += shift; - - Emulation::smem::atomicAdd(&smem[r + 1], 1); - } - - __syncthreads(); - - int* accumRow = accum.ptr(n + 1); - for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) - accumRow[i] = smem[i]; - } - - void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20) - { - const dim3 block(has20 ? 1024 : 512); - const dim3 grid(accum.rows - 2); - - size_t smemSize = (accum.cols - 1) * sizeof(int); - - if (smemSize < sharedMemPerBlock - 1000) - linesAccumShared<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); - else - linesAccumGlobal<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); - - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - //////////////////////////////////////////////////////////////////////// - // linesGetResult - - __global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho) - { - const int r = blockIdx.x * blockDim.x + threadIdx.x; - const int n = blockIdx.y * blockDim.y + threadIdx.y; - - if (r >= accum.cols - 2 || n >= accum.rows - 2) - return; - - const int curVotes = accum(n + 1, r + 1); - - if (curVotes > threshold && - curVotes > accum(n + 1, r) && - curVotes >= accum(n + 1, r + 2) && - curVotes > accum(n, r + 1) && - curVotes >= accum(n + 2, r + 1)) - { - const float radius = (r - (numrho - 1) * 0.5f) * rho; - const float angle = n * theta; - - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - { - out[ind] = make_float2(radius, angle); - votes[ind] = curVotes; - } - } - } - - int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) ); - - linesGetResult<<>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - if (doSort && totalCount > 0) - { - thrust::device_ptr outPtr(out); - thrust::device_ptr votesPtr(votes); - thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater()); - } - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // houghLinesProbabilistic - - texture tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp); - - __global__ void houghLinesProbabilistic(const PtrStepSzi accum, - int4* out, const int maxSize, - const float rho, const float theta, - const int lineGap, const int lineLength, - const int rows, const int cols) - { - const int r = blockIdx.x * blockDim.x + threadIdx.x; - const int n = blockIdx.y * blockDim.y + threadIdx.y; - - if (r >= accum.cols - 2 || n >= accum.rows - 2) - return; - - const int curVotes = accum(n + 1, r + 1); - - if (curVotes >= lineLength && - curVotes > accum(n, r) && - curVotes > accum(n, r + 1) && - curVotes > accum(n, r + 2) && - curVotes > accum(n + 1, r) && - curVotes > accum(n + 1, r + 2) && - curVotes > accum(n + 2, r) && - curVotes > accum(n + 2, r + 1) && - curVotes > accum(n + 2, r + 2)) - { - const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho; - const float angle = n * theta; - - float cosa; - float sina; - sincosf(angle, &sina, &cosa); - - float2 p0 = make_float2(cosa * radius, sina * radius); - float2 dir = make_float2(-sina, cosa); - - float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)}; - float a; - - if (dir.x != 0) - { - a = -p0.x / dir.x; - pb[0].x = 0; - pb[0].y = p0.y + a * dir.y; - - a = (cols - 1 - p0.x) / dir.x; - pb[1].x = cols - 1; - pb[1].y = p0.y + a * dir.y; - } - if (dir.y != 0) - { - a = -p0.y / dir.y; - pb[2].x = p0.x + a * dir.x; - pb[2].y = 0; - - a = (rows - 1 - p0.y) / dir.y; - pb[3].x = p0.x + a * dir.x; - pb[3].y = rows - 1; - } - - if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows)) - { - p0 = pb[0]; - if (dir.x < 0) - dir = -dir; - } - else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows)) - { - p0 = pb[1]; - if (dir.x > 0) - dir = -dir; - } - else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols)) - { - p0 = pb[2]; - if (dir.y < 0) - dir = -dir; - } - else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols)) - { - p0 = pb[3]; - if (dir.y > 0) - dir = -dir; - } - - float2 d; - if (::fabsf(dir.x) > ::fabsf(dir.y)) - { - d.x = dir.x > 0 ? 1 : -1; - d.y = dir.y / ::fabsf(dir.x); - } - else - { - d.x = dir.x / ::fabsf(dir.y); - d.y = dir.y > 0 ? 1 : -1; - } - - float2 line_end[2]; - int gap; - bool inLine = false; - - float2 p1 = p0; - if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) - return; - - for (;;) - { - if (tex2D(tex_mask, p1.x, p1.y)) - { - gap = 0; - - if (!inLine) - { - line_end[0] = p1; - line_end[1] = p1; - inLine = true; - } - else - { - line_end[1] = p1; - } - } - else if (inLine) - { - if (++gap > lineGap) - { - bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || - ::abs(line_end[1].y - line_end[0].y) >= lineLength; - - if (good_line) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); - } - - gap = 0; - inLine = false; - } - } - - p1 = p1 + d; - if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) - { - if (inLine) - { - bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || - ::abs(line_end[1].y - line_end[0].y) >= lineLength; - - if (good_line) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); - } - - } - break; - } - } - } - } - - int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - bindTexture(&tex_mask, mask); - - houghLinesProbabilistic<<>>(accum, - out, maxSize, - rho, theta, - lineGap, lineLength, - mask.rows, mask.cols); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // circlesAccumCenters - - __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy, - PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp) - { - const int SHIFT = 10; - const int ONE = 1 << SHIFT; - - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - if (tid >= count) - return; - - const unsigned int val = list[tid]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - const int vx = dx(y, x); - const int vy = dy(y, x); - - if (vx == 0 && vy == 0) - return; - - const float mag = ::sqrtf(vx * vx + vy * vy); - - const int x0 = __float2int_rn((x * idp) * ONE); - const int y0 = __float2int_rn((y * idp) * ONE); - - int sx = __float2int_rn((vx * idp) * ONE / mag); - int sy = __float2int_rn((vy * idp) * ONE / mag); - - // Step from minRadius to maxRadius in both directions of the gradient - for (int k1 = 0; k1 < 2; ++k1) - { - int x1 = x0 + minRadius * sx; - int y1 = y0 + minRadius * sy; - - for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r) - { - const int x2 = x1 >> SHIFT; - const int y2 = y1 >> SHIFT; - - if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height) - break; - - ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1); - } - - sx = -sx; - sy = -sy; - } - } - - void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp) - { - const dim3 block(256); - const dim3 grid(divUp(count, block.x)); - - cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) ); - - circlesAccumCenters<<>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - //////////////////////////////////////////////////////////////////////// - // buildCentersList - - __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < accum.cols - 2 && y < accum.rows - 2) - { - const int top = accum(y, x + 1); - - const int left = accum(y + 1, x); - const int cur = accum(y + 1, x + 1); - const int right = accum(y + 1, x + 2); - - const int bottom = accum(y + 2, x + 1); - - if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right) - { - const unsigned int val = (y << 16) | x; - const int idx = ::atomicAdd(&g_counter, 1); - centers[idx] = val; - } - } - } - - int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) ); - - buildCentersList<<>>(accum, centers, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // circlesAccumRadius - - __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count, - float3* circles, const int maxCircles, const float dp, - const int minRadius, const int maxRadius, const int histSize, const int threshold) - { - int* smem = DynamicSharedMem(); - - for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x) - smem[i] = 0; - __syncthreads(); - - unsigned int val = centers[blockIdx.x]; - - float cx = (val & 0xFFFF); - float cy = (val >> 16) & 0xFFFF; - - cx = (cx + 0.5f) * dp; - cy = (cy + 0.5f) * dp; - - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y)); - if (rad >= minRadius && rad <= maxRadius) - { - const int r = __float2int_rn(rad - minRadius); - - Emulation::smem::atomicAdd(&smem[r + 1], 1); - } - } - - __syncthreads(); - - for (int i = threadIdx.x; i < histSize; i += blockDim.x) - { - const int curVotes = smem[i + 1]; - - if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2]) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxCircles) - circles[ind] = make_float3(cx, cy, i + minRadius); - } - } - } - - int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, - float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(has20 ? 1024 : 512); - const dim3 grid(centersCount); - - const int histSize = maxRadius - minRadius + 1; - size_t smemSize = (histSize + 2) * sizeof(int); - - circlesAccumRadius<<>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxCircles); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // Generalized Hough - - template - __global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep dx, const PtrStep dy, unsigned int* coordList, float* thetaList) - { - __shared__ unsigned int s_coordLists[4][32 * PIXELS_PER_THREAD]; - __shared__ float s_thetaLists[4][32 * PIXELS_PER_THREAD]; - __shared__ int s_sizes[4]; - __shared__ int s_globStart[4]; - - const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (threadIdx.x == 0) - s_sizes[threadIdx.y] = 0; - __syncthreads(); - - if (y < edges.rows) - { - // fill the queue - const uchar* edgesRow = edges.ptr(y); - const T* dxRow = dx.ptr(y); - const T* dyRow = dy.ptr(y); - - for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < edges.cols; ++i, xx += blockDim.x) - { - const T dxVal = dxRow[xx]; - const T dyVal = dyRow[xx]; - - if (edgesRow[xx] && (dxVal != 0 || dyVal != 0)) - { - const unsigned int coord = (y << 16) | xx; - - float theta = ::atan2f(dyVal, dxVal); - if (theta < 0) - theta += 2.0f * CV_PI_F; - - const int qidx = Emulation::smem::atomicAdd(&s_sizes[threadIdx.y], 1); - - s_coordLists[threadIdx.y][qidx] = coord; - s_thetaLists[threadIdx.y][qidx] = theta; - } - } - } - - __syncthreads(); - - // let one thread reserve the space required in the global list - if (threadIdx.x == 0 && threadIdx.y == 0) - { - // find how many items are stored in each list - int totalSize = 0; - for (int i = 0; i < blockDim.y; ++i) - { - s_globStart[i] = totalSize; - totalSize += s_sizes[i]; - } - - // calculate the offset in the global list - const int globalOffset = atomicAdd(&g_counter, totalSize); - for (int i = 0; i < blockDim.y; ++i) - s_globStart[i] += globalOffset; - } - - __syncthreads(); - - // copy local queues to global queue - const int qsize = s_sizes[threadIdx.y]; - int gidx = s_globStart[threadIdx.y] + threadIdx.x; - for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) - { - coordList[gidx] = s_coordLists[threadIdx.y][i]; - thetaList[gidx] = s_thetaLists[threadIdx.y][i]; - } - } - - template - int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList) - { - const int PIXELS_PER_THREAD = 8; - - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 4); - const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList, cudaFuncCachePreferShared) ); - - buildEdgePointList<<>>(edges, (PtrStepSz) dx, (PtrStepSz) dy, coordList, thetaList); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return totalCount; - } - - template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); - template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); - template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); - - __global__ void buildRTable(const unsigned int* coordList, const float* thetaList, const int pointsCount, - PtrStep r_table, int* r_sizes, int maxSize, - const short2 templCenter, const float thetaScale) - { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - if (tid >= pointsCount) - return; - - const unsigned int coord = coordList[tid]; - short2 p; - p.x = (coord & 0xFFFF); - p.y = (coord >> 16) & 0xFFFF; - - const float theta = thetaList[tid]; - const int n = __float2int_rn(theta * thetaScale); - - const int ind = ::atomicAdd(r_sizes + n, 1); - if (ind < maxSize) - r_table(n, ind) = saturate_cast(p - templCenter); - } - - void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, int* r_sizes, - short2 templCenter, int levels) - { - const dim3 block(256); - const dim3 grid(divUp(pointsCount, block.x)); - - const float thetaScale = levels / (2.0f * CV_PI_F); - - buildRTable<<>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - //////////////////////////////////////////////////////////////////////// - // GHT_Ballard_Pos - - __global__ void GHT_Ballard_Pos_calcHist(const unsigned int* coordList, const float* thetaList, const int pointsCount, - const PtrStep r_table, const int* r_sizes, - PtrStepSzi hist, - const float idp, const float thetaScale) - { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - if (tid >= pointsCount) - return; - - const unsigned int coord = coordList[tid]; - short2 p; - p.x = (coord & 0xFFFF); - p.y = (coord >> 16) & 0xFFFF; - - const float theta = thetaList[tid]; - const int n = __float2int_rn(theta * thetaScale); - - const short2* r_row = r_table.ptr(n); - const int r_row_size = r_sizes[n]; - - for (int j = 0; j < r_row_size; ++j) - { - int2 c = p - r_row[j]; - - c.x = __float2int_rn(c.x * idp); - c.y = __float2int_rn(c.y * idp); - - if (c.x >= 0 && c.x < hist.cols - 2 && c.y >= 0 && c.y < hist.rows - 2) - ::atomicAdd(hist.ptr(c.y + 1) + c.x + 1, 1); - } - } - - void GHT_Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepSzi hist, - float dp, int levels) - { - const dim3 block(256); - const dim3 grid(divUp(pointsCount, block.x)); - - const float idp = 1.0f / dp; - const float thetaScale = levels / (2.0f * CV_PI_F); - - GHT_Ballard_Pos_calcHist<<>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, const float dp, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= hist.cols - 2 || y >= hist.rows - 2) - return; - - const int curVotes = hist(y + 1, x + 1); - - if (curVotes > threshold && - curVotes > hist(y + 1, x) && - curVotes >= hist(y + 1, x + 2) && - curVotes > hist(y, x + 1) && - curVotes >= hist(y + 2, x + 1)) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < maxSize) - { - out[ind] = make_float4(x * dp, y * dp, 1.0f, 0.0f); - votes[ind] = make_int3(curVotes, 0, 0); - } - } - } - - int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) ); - - GHT_Ballard_Pos_findPosInHist<<>>(hist, out, votes, maxSize, dp, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // GHT_Ballard_PosScale - - __global__ void GHT_Ballard_PosScale_calcHist(const unsigned int* coordList, const float* thetaList, - PtrStep r_table, const int* r_sizes, - PtrStepi hist, const int rows, const int cols, - const float minScale, const float scaleStep, const int scaleRange, - const float idp, const float thetaScale) - { - const unsigned int coord = coordList[blockIdx.x]; - float2 p; - p.x = (coord & 0xFFFF); - p.y = (coord >> 16) & 0xFFFF; - - const float theta = thetaList[blockIdx.x]; - const int n = __float2int_rn(theta * thetaScale); - - const short2* r_row = r_table.ptr(n); - const int r_row_size = r_sizes[n]; - - for (int j = 0; j < r_row_size; ++j) - { - const float2 d = saturate_cast(r_row[j]); - - for (int s = threadIdx.x; s < scaleRange; s += blockDim.x) - { - const float scale = minScale + s * scaleStep; - - float2 c = p - scale * d; - - c.x *= idp; - c.y *= idp; - - if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows) - ::atomicAdd(hist.ptr((s + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1); - } - } - } - - void GHT_Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepi hist, int rows, int cols, - float minScale, float scaleStep, int scaleRange, - float dp, int levels) - { - const dim3 block(256); - const dim3 grid(pointsCount); - - const float idp = 1.0f / dp; - const float thetaScale = levels / (2.0f * CV_PI_F); - - GHT_Ballard_PosScale_calcHist<<>>(coordList, thetaList, - r_table, r_sizes, - hist, rows, cols, - minScale, scaleStep, scaleRange, - idp, thetaScale); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange, - float4* out, int3* votes, const int maxSize, - const float minScale, const float scaleStep, const float dp, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= cols || y >= rows) - return; - - for (int s = 0; s < scaleRange; ++s) - { - const float scale = minScale + s * scaleStep; - - const int prevScaleIdx = (s) * (rows + 2); - const int curScaleIdx = (s + 1) * (rows + 2); - const int nextScaleIdx = (s + 2) * (rows + 2); - - const int curVotes = hist(curScaleIdx + y + 1, x + 1); - - if (curVotes > threshold && - curVotes > hist(curScaleIdx + y + 1, x) && - curVotes >= hist(curScaleIdx + y + 1, x + 2) && - curVotes > hist(curScaleIdx + y, x + 1) && - curVotes >= hist(curScaleIdx + y + 2, x + 1) && - curVotes > hist(prevScaleIdx + y + 1, x + 1) && - curVotes >= hist(nextScaleIdx + y + 1, x + 1)) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < maxSize) - { - out[ind] = make_float4(x * dp, y * dp, scale, 0.0f); - votes[ind] = make_int3(curVotes, curVotes, 0); - } - } - } - } - - int GHT_Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize, - float minScale, float scaleStep, float dp, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) ); - - GHT_Ballard_PosScale_findPosInHist<<>>(hist, rows, cols, scaleRange, out, votes, maxSize, minScale, scaleStep, dp, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // GHT_Ballard_PosRotation - - __global__ void GHT_Ballard_PosRotation_calcHist(const unsigned int* coordList, const float* thetaList, - PtrStep r_table, const int* r_sizes, - PtrStepi hist, const int rows, const int cols, - const float minAngle, const float angleStep, const int angleRange, - const float idp, const float thetaScale) - { - const unsigned int coord = coordList[blockIdx.x]; - float2 p; - p.x = (coord & 0xFFFF); - p.y = (coord >> 16) & 0xFFFF; - - const float thetaVal = thetaList[blockIdx.x]; - - for (int a = threadIdx.x; a < angleRange; a += blockDim.x) - { - const float angle = (minAngle + a * angleStep) * (CV_PI_F / 180.0f); - float sinA, cosA; - sincosf(angle, &sinA, &cosA); - - float theta = thetaVal - angle; - if (theta < 0) - theta += 2.0f * CV_PI_F; - - const int n = __float2int_rn(theta * thetaScale); - - const short2* r_row = r_table.ptr(n); - const int r_row_size = r_sizes[n]; - - for (int j = 0; j < r_row_size; ++j) - { - const float2 d = saturate_cast(r_row[j]); - - const float2 dr = make_float2(d.x * cosA - d.y * sinA, d.x * sinA + d.y * cosA); - - float2 c = make_float2(p.x - dr.x, p.y - dr.y); - c.x *= idp; - c.y *= idp; - - if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows) - ::atomicAdd(hist.ptr((a + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1); - } - } - } - - void GHT_Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepi hist, int rows, int cols, - float minAngle, float angleStep, int angleRange, - float dp, int levels) - { - const dim3 block(256); - const dim3 grid(pointsCount); - - const float idp = 1.0f / dp; - const float thetaScale = levels / (2.0f * CV_PI_F); - - GHT_Ballard_PosRotation_calcHist<<>>(coordList, thetaList, - r_table, r_sizes, - hist, rows, cols, - minAngle, angleStep, angleRange, - idp, thetaScale); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange, - float4* out, int3* votes, const int maxSize, - const float minAngle, const float angleStep, const float dp, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= cols || y >= rows) - return; - - for (int a = 0; a < angleRange; ++a) - { - const float angle = minAngle + a * angleStep; - - const int prevAngleIdx = (a) * (rows + 2); - const int curAngleIdx = (a + 1) * (rows + 2); - const int nextAngleIdx = (a + 2) * (rows + 2); - - const int curVotes = hist(curAngleIdx + y + 1, x + 1); - - if (curVotes > threshold && - curVotes > hist(curAngleIdx + y + 1, x) && - curVotes >= hist(curAngleIdx + y + 1, x + 2) && - curVotes > hist(curAngleIdx + y, x + 1) && - curVotes >= hist(curAngleIdx + y + 2, x + 1) && - curVotes > hist(prevAngleIdx + y + 1, x + 1) && - curVotes >= hist(nextAngleIdx + y + 1, x + 1)) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < maxSize) - { - out[ind] = make_float4(x * dp, y * dp, 1.0f, angle); - votes[ind] = make_int3(curVotes, 0, curVotes); - } - } - } - } - - int GHT_Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize, - float minAngle, float angleStep, float dp, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) ); - - GHT_Ballard_PosRotation_findPosInHist<<>>(hist, rows, cols, angleRange, out, votes, maxSize, minAngle, angleStep, dp, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // GHT_Guil_Full - - struct FeatureTable - { - uchar* p1_pos_data; - size_t p1_pos_step; - - uchar* p1_theta_data; - size_t p1_theta_step; - - uchar* p2_pos_data; - size_t p2_pos_step; - - uchar* d12_data; - size_t d12_step; - - uchar* r1_data; - size_t r1_step; - - uchar* r2_data; - size_t r2_step; - }; - - __constant__ FeatureTable c_templFeatures; - __constant__ FeatureTable c_imageFeatures; - - void GHT_Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) - { - FeatureTable tbl; - - tbl.p1_pos_data = p1_pos.data; - tbl.p1_pos_step = p1_pos.step; - - tbl.p1_theta_data = p1_theta.data; - tbl.p1_theta_step = p1_theta.step; - - tbl.p2_pos_data = p2_pos.data; - tbl.p2_pos_step = p2_pos.step; - - tbl.d12_data = d12.data; - tbl.d12_step = d12.step; - - tbl.r1_data = r1.data; - tbl.r1_step = r1.step; - - tbl.r2_data = r2.data; - tbl.r2_step = r2.step; - - cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) ); - } - void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) - { - FeatureTable tbl; - - tbl.p1_pos_data = p1_pos.data; - tbl.p1_pos_step = p1_pos.step; - - tbl.p1_theta_data = p1_theta.data; - tbl.p1_theta_step = p1_theta.step; - - tbl.p2_pos_data = p2_pos.data; - tbl.p2_pos_step = p2_pos.step; - - tbl.d12_data = d12.data; - tbl.d12_step = d12.step; - - tbl.r1_data = r1.data; - tbl.r1_step = r1.step; - - tbl.r2_data = r2.data; - tbl.r2_step = r2.step; - - cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) ); - } - - struct TemplFeatureTable - { - static __device__ float2* p1_pos(int n) - { - return (float2*)(c_templFeatures.p1_pos_data + n * c_templFeatures.p1_pos_step); - } - static __device__ float* p1_theta(int n) - { - return (float*)(c_templFeatures.p1_theta_data + n * c_templFeatures.p1_theta_step); - } - static __device__ float2* p2_pos(int n) - { - return (float2*)(c_templFeatures.p2_pos_data + n * c_templFeatures.p2_pos_step); - } - - static __device__ float* d12(int n) - { - return (float*)(c_templFeatures.d12_data + n * c_templFeatures.d12_step); - } - - static __device__ float2* r1(int n) - { - return (float2*)(c_templFeatures.r1_data + n * c_templFeatures.r1_step); - } - static __device__ float2* r2(int n) - { - return (float2*)(c_templFeatures.r2_data + n * c_templFeatures.r2_step); - } - }; - struct ImageFeatureTable - { - static __device__ float2* p1_pos(int n) - { - return (float2*)(c_imageFeatures.p1_pos_data + n * c_imageFeatures.p1_pos_step); - } - static __device__ float* p1_theta(int n) - { - return (float*)(c_imageFeatures.p1_theta_data + n * c_imageFeatures.p1_theta_step); - } - static __device__ float2* p2_pos(int n) - { - return (float2*)(c_imageFeatures.p2_pos_data + n * c_imageFeatures.p2_pos_step); - } - - static __device__ float* d12(int n) - { - return (float*)(c_imageFeatures.d12_data + n * c_imageFeatures.d12_step); - } - - static __device__ float2* r1(int n) - { - return (float2*)(c_imageFeatures.r1_data + n * c_imageFeatures.r1_step); - } - static __device__ float2* r2(int n) - { - return (float2*)(c_imageFeatures.r2_data + n * c_imageFeatures.r2_step); - } - }; - - __device__ float clampAngle(float a) - { - float res = a; - - while (res > 2.0f * CV_PI_F) - res -= 2.0f * CV_PI_F; - while (res < 0.0f) - res += 2.0f * CV_PI_F; - - return res; - } - - __device__ bool angleEq(float a, float b, float eps) - { - return (::fabs(clampAngle(a - b)) <= eps); - } - - template - __global__ void GHT_Guil_Full_buildFeatureList(const unsigned int* coordList, const float* thetaList, const int pointsCount, - int* sizes, const int maxSize, - const float xi, const float angleEpsilon, const float alphaScale, - const float2 center, const float maxDist) - { - const float p1_theta = thetaList[blockIdx.x]; - const unsigned int coord1 = coordList[blockIdx.x]; - float2 p1_pos; - p1_pos.x = (coord1 & 0xFFFF); - p1_pos.y = (coord1 >> 16) & 0xFFFF; - - for (int i = threadIdx.x; i < pointsCount; i += blockDim.x) - { - const float p2_theta = thetaList[i]; - const unsigned int coord2 = coordList[i]; - float2 p2_pos; - p2_pos.x = (coord2 & 0xFFFF); - p2_pos.y = (coord2 >> 16) & 0xFFFF; - - if (angleEq(p1_theta - p2_theta, xi, angleEpsilon)) - { - const float2 d = p1_pos - p2_pos; - - float alpha12 = clampAngle(::atan2(d.y, d.x) - p1_theta); - float d12 = ::sqrtf(d.x * d.x + d.y * d.y); - - if (d12 > maxDist) - continue; - - float2 r1 = p1_pos - center; - float2 r2 = p2_pos - center; - - const int n = __float2int_rn(alpha12 * alphaScale); - - const int ind = ::atomicAdd(sizes + n, 1); - - if (ind < maxSize) - { - if (!isTempl) - { - FT::p1_pos(n)[ind] = p1_pos; - FT::p2_pos(n)[ind] = p2_pos; - } - - FT::p1_theta(n)[ind] = p1_theta; - - FT::d12(n)[ind] = d12; - - if (isTempl) - { - FT::r1(n)[ind] = r1; - FT::r2(n)[ind] = r2; - } - } - } - } - } - - template - void GHT_Guil_Full_buildFeatureList_caller(const unsigned int* coordList, const float* thetaList, int pointsCount, - int* sizes, int maxSize, - float xi, float angleEpsilon, int levels, - float2 center, float maxDist) - { - const dim3 block(256); - const dim3 grid(pointsCount); - - const float alphaScale = levels / (2.0f * CV_PI_F); - - GHT_Guil_Full_buildFeatureList<<>>(coordList, thetaList, pointsCount, - sizes, maxSize, - xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale, - center, maxDist); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - thrust::device_ptr sizesPtr(sizes); - thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum(), maxSize)); - } - - void GHT_Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - int* sizes, int maxSize, - float xi, float angleEpsilon, int levels, - float2 center, float maxDist) - { - GHT_Guil_Full_buildFeatureList_caller(coordList, thetaList, pointsCount, - sizes, maxSize, - xi, angleEpsilon, levels, - center, maxDist); - } - void GHT_Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - int* sizes, int maxSize, - float xi, float angleEpsilon, int levels, - float2 center, float maxDist) - { - GHT_Guil_Full_buildFeatureList_caller(coordList, thetaList, pointsCount, - sizes, maxSize, - xi, angleEpsilon, levels, - center, maxDist); - } - - __global__ void GHT_Guil_Full_calcOHist(const int* templSizes, const int* imageSizes, int* OHist, - const float minAngle, const float maxAngle, const float iAngleStep, const int angleRange) - { - extern __shared__ int s_OHist[]; - for (int i = threadIdx.x; i <= angleRange; i += blockDim.x) - s_OHist[i] = 0; - __syncthreads(); - - const int tIdx = blockIdx.x; - const int level = blockIdx.y; - - const int tSize = templSizes[level]; - - if (tIdx < tSize) - { - const int imSize = imageSizes[level]; - - const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx]; - - for (int i = threadIdx.x; i < imSize; i += blockDim.x) - { - const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; - - const float angle = clampAngle(im_p1_theta - t_p1_theta); - - if (angle >= minAngle && angle <= maxAngle) - { - const int n = __float2int_rn((angle - minAngle) * iAngleStep); - Emulation::smem::atomicAdd(&s_OHist[n], 1); - } - } - } - __syncthreads(); - - for (int i = threadIdx.x; i <= angleRange; i += blockDim.x) - ::atomicAdd(OHist + i, s_OHist[i]); - } - - void GHT_Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist, - float minAngle, float maxAngle, float angleStep, int angleRange, - int levels, int tMaxSize) - { - const dim3 block(256); - const dim3 grid(tMaxSize, levels + 1); - - minAngle *= (CV_PI_F / 180.0f); - maxAngle *= (CV_PI_F / 180.0f); - angleStep *= (CV_PI_F / 180.0f); - - const size_t smemSize = (angleRange + 1) * sizeof(float); - - GHT_Guil_Full_calcOHist<<>>(templSizes, imageSizes, OHist, - minAngle, maxAngle, 1.0f / angleStep, angleRange); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist, - const float angle, const float angleEpsilon, - const float minScale, const float maxScale, const float iScaleStep, const int scaleRange) - { - extern __shared__ int s_SHist[]; - for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x) - s_SHist[i] = 0; - __syncthreads(); - - const int tIdx = blockIdx.x; - const int level = blockIdx.y; - - const int tSize = templSizes[level]; - - if (tIdx < tSize) - { - const int imSize = imageSizes[level]; - - const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle; - const float t_d12 = TemplFeatureTable::d12(level)[tIdx] + angle; - - for (int i = threadIdx.x; i < imSize; i += blockDim.x) - { - const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; - const float im_d12 = ImageFeatureTable::d12(level)[i]; - - if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon)) - { - const float scale = im_d12 / t_d12; - - if (scale >= minScale && scale <= maxScale) - { - const int s = __float2int_rn((scale - minScale) * iScaleStep); - Emulation::smem::atomicAdd(&s_SHist[s], 1); - } - } - } - } - __syncthreads(); - - for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x) - ::atomicAdd(SHist + i, s_SHist[i]); - } - - void GHT_Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist, - float angle, float angleEpsilon, - float minScale, float maxScale, float iScaleStep, int scaleRange, - int levels, int tMaxSize) - { - const dim3 block(256); - const dim3 grid(tMaxSize, levels + 1); - - angle *= (CV_PI_F / 180.0f); - angleEpsilon *= (CV_PI_F / 180.0f); - - const size_t smemSize = (scaleRange + 1) * sizeof(float); - - GHT_Guil_Full_calcSHist<<>>(templSizes, imageSizes, SHist, - angle, angleEpsilon, - minScale, maxScale, iScaleStep, scaleRange); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, - const float angle, const float sinVal, const float cosVal, const float angleEpsilon, const float scale, - const float idp) - { - const int tIdx = blockIdx.x; - const int level = blockIdx.y; - - const int tSize = templSizes[level]; - - if (tIdx < tSize) - { - const int imSize = imageSizes[level]; - - const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle; - - float2 r1 = TemplFeatureTable::r1(level)[tIdx]; - float2 r2 = TemplFeatureTable::r2(level)[tIdx]; - - r1 = r1 * scale; - r2 = r2 * scale; - - r1 = make_float2(cosVal * r1.x - sinVal * r1.y, sinVal * r1.x + cosVal * r1.y); - r2 = make_float2(cosVal * r2.x - sinVal * r2.y, sinVal * r2.x + cosVal * r2.y); - - for (int i = threadIdx.x; i < imSize; i += blockDim.x) - { - const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; - - const float2 im_p1_pos = ImageFeatureTable::p1_pos(level)[i]; - const float2 im_p2_pos = ImageFeatureTable::p2_pos(level)[i]; - - if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon)) - { - float2 c1, c2; - - c1 = im_p1_pos - r1; - c1 = c1 * idp; - - c2 = im_p2_pos - r2; - c2 = c2 * idp; - - if (::fabs(c1.x - c2.x) > 1 || ::fabs(c1.y - c2.y) > 1) - continue; - - if (c1.y >= 0 && c1.y < PHist.rows - 2 && c1.x >= 0 && c1.x < PHist.cols - 2) - ::atomicAdd(PHist.ptr(__float2int_rn(c1.y) + 1) + __float2int_rn(c1.x) + 1, 1); - } - } - } - } - - void GHT_Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, - float angle, float angleEpsilon, float scale, - float dp, - int levels, int tMaxSize) - { - const dim3 block(256); - const dim3 grid(tMaxSize, levels + 1); - - angle *= (CV_PI_F / 180.0f); - angleEpsilon *= (CV_PI_F / 180.0f); - - const float sinVal = ::sinf(angle); - const float cosVal = ::cosf(angle); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) ); - - GHT_Guil_Full_calcPHist<<>>(templSizes, imageSizes, PHist, - angle, sinVal, cosVal, angleEpsilon, scale, - 1.0f / dp); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, - const float angle, const int angleVotes, const float scale, const int scaleVotes, - const float dp, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= hist.cols - 2 || y >= hist.rows - 2) - return; - - const int curVotes = hist(y + 1, x + 1); - - if (curVotes > threshold && - curVotes > hist(y + 1, x) && - curVotes >= hist(y + 1, x + 2) && - curVotes > hist(y, x + 1) && - curVotes >= hist(y + 2, x + 1)) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < maxSize) - { - out[ind] = make_float4(x * dp, y * dp, scale, angle); - votes[ind] = make_int3(curVotes, scaleVotes, angleVotes); - } - } - } - - int GHT_Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize, - float angle, int angleVotes, float scale, int scaleVotes, - float dp, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) ); - - GHT_Guil_Full_findPosInHist<<>>(hist, out, votes, maxSize, - angle, angleVotes, scale, scaleVotes, - dp, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - } -}}} - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpuimgproc/src/cuda/hough_circles.cu b/modules/gpuimgproc/src/cuda/hough_circles.cu new file mode 100644 index 000000000..1b6b10336 --- /dev/null +++ b/modules/gpuimgproc/src/cuda/hough_circles.cu @@ -0,0 +1,255 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/emulation.hpp" +#include "opencv2/core/cuda/dynamic_smem.hpp" + +namespace cv { namespace gpu { namespace cudev +{ + namespace hough_circles + { + __device__ int g_counter; + + //////////////////////////////////////////////////////////////////////// + // circlesAccumCenters + + __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy, + PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp) + { + const int SHIFT = 10; + const int ONE = 1 << SHIFT; + + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid >= count) + return; + + const unsigned int val = list[tid]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + const int vx = dx(y, x); + const int vy = dy(y, x); + + if (vx == 0 && vy == 0) + return; + + const float mag = ::sqrtf(vx * vx + vy * vy); + + const int x0 = __float2int_rn((x * idp) * ONE); + const int y0 = __float2int_rn((y * idp) * ONE); + + int sx = __float2int_rn((vx * idp) * ONE / mag); + int sy = __float2int_rn((vy * idp) * ONE / mag); + + // Step from minRadius to maxRadius in both directions of the gradient + for (int k1 = 0; k1 < 2; ++k1) + { + int x1 = x0 + minRadius * sx; + int y1 = y0 + minRadius * sy; + + for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r) + { + const int x2 = x1 >> SHIFT; + const int y2 = y1 >> SHIFT; + + if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height) + break; + + ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1); + } + + sx = -sx; + sy = -sy; + } + } + + void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp) + { + const dim3 block(256); + const dim3 grid(divUp(count, block.x)); + + cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) ); + + circlesAccumCenters<<>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + //////////////////////////////////////////////////////////////////////// + // buildCentersList + + __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < accum.cols - 2 && y < accum.rows - 2) + { + const int top = accum(y, x + 1); + + const int left = accum(y + 1, x); + const int cur = accum(y + 1, x + 1); + const int right = accum(y + 1, x + 2); + + const int bottom = accum(y + 2, x + 1); + + if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right) + { + const unsigned int val = (y << 16) | x; + const int idx = ::atomicAdd(&g_counter, 1); + centers[idx] = val; + } + } + } + + int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) ); + + buildCentersList<<>>(accum, centers, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + return totalCount; + } + + //////////////////////////////////////////////////////////////////////// + // circlesAccumRadius + + __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count, + float3* circles, const int maxCircles, const float dp, + const int minRadius, const int maxRadius, const int histSize, const int threshold) + { + int* smem = DynamicSharedMem(); + + for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x) + smem[i] = 0; + __syncthreads(); + + unsigned int val = centers[blockIdx.x]; + + float cx = (val & 0xFFFF); + float cy = (val >> 16) & 0xFFFF; + + cx = (cx + 0.5f) * dp; + cy = (cy + 0.5f) * dp; + + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + val = list[i]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y)); + if (rad >= minRadius && rad <= maxRadius) + { + const int r = __float2int_rn(rad - minRadius); + + Emulation::smem::atomicAdd(&smem[r + 1], 1); + } + } + + __syncthreads(); + + for (int i = threadIdx.x; i < histSize; i += blockDim.x) + { + const int curVotes = smem[i + 1]; + + if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2]) + { + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxCircles) + circles[ind] = make_float3(cx, cy, i + minRadius); + } + } + } + + int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, + float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(has20 ? 1024 : 512); + const dim3 grid(centersCount); + + const int histSize = maxRadius - minRadius + 1; + size_t smemSize = (histSize + 2) * sizeof(int); + + circlesAccumRadius<<>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxCircles); + + return totalCount; + } + } +}}} + + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpuimgproc/src/cuda/hough_lines.cu b/modules/gpuimgproc/src/cuda/hough_lines.cu new file mode 100644 index 000000000..0cee0a43d --- /dev/null +++ b/modules/gpuimgproc/src/cuda/hough_lines.cu @@ -0,0 +1,212 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include +#include + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/emulation.hpp" +#include "opencv2/core/cuda/dynamic_smem.hpp" + +namespace cv { namespace gpu { namespace cudev +{ + namespace hough_lines + { + __device__ int g_counter; + + //////////////////////////////////////////////////////////////////////// + // linesAccum + + __global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) + { + const int n = blockIdx.x; + const float ang = n * theta; + + float sinVal; + float cosVal; + sincosf(ang, &sinVal, &cosVal); + sinVal *= irho; + cosVal *= irho; + + const int shift = (numrho - 1) / 2; + + int* accumRow = accum.ptr(n + 1); + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + const unsigned int val = list[i]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + int r = __float2int_rn(x * cosVal + y * sinVal); + r += shift; + + ::atomicAdd(accumRow + r + 1, 1); + } + } + + __global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) + { + int* smem = DynamicSharedMem(); + + for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) + smem[i] = 0; + + __syncthreads(); + + const int n = blockIdx.x; + const float ang = n * theta; + + float sinVal; + float cosVal; + sincosf(ang, &sinVal, &cosVal); + sinVal *= irho; + cosVal *= irho; + + const int shift = (numrho - 1) / 2; + + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + const unsigned int val = list[i]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + int r = __float2int_rn(x * cosVal + y * sinVal); + r += shift; + + Emulation::smem::atomicAdd(&smem[r + 1], 1); + } + + __syncthreads(); + + int* accumRow = accum.ptr(n + 1); + for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) + accumRow[i] = smem[i]; + } + + void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20) + { + const dim3 block(has20 ? 1024 : 512); + const dim3 grid(accum.rows - 2); + + size_t smemSize = (accum.cols - 1) * sizeof(int); + + if (smemSize < sharedMemPerBlock - 1000) + linesAccumShared<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); + else + linesAccumGlobal<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); + + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + //////////////////////////////////////////////////////////////////////// + // linesGetResult + + __global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho) + { + const int r = blockIdx.x * blockDim.x + threadIdx.x; + const int n = blockIdx.y * blockDim.y + threadIdx.y; + + if (r >= accum.cols - 2 || n >= accum.rows - 2) + return; + + const int curVotes = accum(n + 1, r + 1); + + if (curVotes > threshold && + curVotes > accum(n + 1, r) && + curVotes >= accum(n + 1, r + 2) && + curVotes > accum(n, r + 1) && + curVotes >= accum(n + 2, r + 1)) + { + const float radius = (r - (numrho - 1) * 0.5f) * rho; + const float angle = n * theta; + + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxSize) + { + out[ind] = make_float2(radius, angle); + votes[ind] = curVotes; + } + } + } + + int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) ); + + linesGetResult<<>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxSize); + + if (doSort && totalCount > 0) + { + thrust::device_ptr outPtr(out); + thrust::device_ptr votesPtr(votes); + thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater()); + } + + return totalCount; + } + } +}}} + + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpuimgproc/src/cuda/hough_segments.cu b/modules/gpuimgproc/src/cuda/hough_segments.cu new file mode 100644 index 000000000..e420449fa --- /dev/null +++ b/modules/gpuimgproc/src/cuda/hough_segments.cu @@ -0,0 +1,249 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/vec_math.hpp" + +namespace cv { namespace gpu { namespace cudev +{ + namespace hough_segments + { + __device__ int g_counter; + + texture tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp); + + __global__ void houghLinesProbabilistic(const PtrStepSzi accum, + int4* out, const int maxSize, + const float rho, const float theta, + const int lineGap, const int lineLength, + const int rows, const int cols) + { + const int r = blockIdx.x * blockDim.x + threadIdx.x; + const int n = blockIdx.y * blockDim.y + threadIdx.y; + + if (r >= accum.cols - 2 || n >= accum.rows - 2) + return; + + const int curVotes = accum(n + 1, r + 1); + + if (curVotes >= lineLength && + curVotes > accum(n, r) && + curVotes > accum(n, r + 1) && + curVotes > accum(n, r + 2) && + curVotes > accum(n + 1, r) && + curVotes > accum(n + 1, r + 2) && + curVotes > accum(n + 2, r) && + curVotes > accum(n + 2, r + 1) && + curVotes > accum(n + 2, r + 2)) + { + const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho; + const float angle = n * theta; + + float cosa; + float sina; + sincosf(angle, &sina, &cosa); + + float2 p0 = make_float2(cosa * radius, sina * radius); + float2 dir = make_float2(-sina, cosa); + + float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)}; + float a; + + if (dir.x != 0) + { + a = -p0.x / dir.x; + pb[0].x = 0; + pb[0].y = p0.y + a * dir.y; + + a = (cols - 1 - p0.x) / dir.x; + pb[1].x = cols - 1; + pb[1].y = p0.y + a * dir.y; + } + if (dir.y != 0) + { + a = -p0.y / dir.y; + pb[2].x = p0.x + a * dir.x; + pb[2].y = 0; + + a = (rows - 1 - p0.y) / dir.y; + pb[3].x = p0.x + a * dir.x; + pb[3].y = rows - 1; + } + + if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows)) + { + p0 = pb[0]; + if (dir.x < 0) + dir = -dir; + } + else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows)) + { + p0 = pb[1]; + if (dir.x > 0) + dir = -dir; + } + else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols)) + { + p0 = pb[2]; + if (dir.y < 0) + dir = -dir; + } + else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols)) + { + p0 = pb[3]; + if (dir.y > 0) + dir = -dir; + } + + float2 d; + if (::fabsf(dir.x) > ::fabsf(dir.y)) + { + d.x = dir.x > 0 ? 1 : -1; + d.y = dir.y / ::fabsf(dir.x); + } + else + { + d.x = dir.x / ::fabsf(dir.y); + d.y = dir.y > 0 ? 1 : -1; + } + + float2 line_end[2]; + int gap; + bool inLine = false; + + float2 p1 = p0; + if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) + return; + + for (;;) + { + if (tex2D(tex_mask, p1.x, p1.y)) + { + gap = 0; + + if (!inLine) + { + line_end[0] = p1; + line_end[1] = p1; + inLine = true; + } + else + { + line_end[1] = p1; + } + } + else if (inLine) + { + if (++gap > lineGap) + { + bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || + ::abs(line_end[1].y - line_end[0].y) >= lineLength; + + if (good_line) + { + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxSize) + out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); + } + + gap = 0; + inLine = false; + } + } + + p1 = p1 + d; + if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) + { + if (inLine) + { + bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || + ::abs(line_end[1].y - line_end[0].y) >= lineLength; + + if (good_line) + { + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxSize) + out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); + } + + } + break; + } + } + } + } + + int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); + + bindTexture(&tex_mask, mask); + + houghLinesProbabilistic<<>>(accum, + out, maxSize, + rho, theta, + lineGap, lineLength, + mask.rows, mask.cols); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxSize); + + return totalCount; + } + } +}}} + + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpuimgproc/src/hough.cpp b/modules/gpuimgproc/src/generalized_hough.cpp similarity index 58% rename from modules/gpuimgproc/src/hough.cpp rename to modules/gpuimgproc/src/generalized_hough.cpp index 90b0261bd..1bc0574b1 100644 --- a/modules/gpuimgproc/src/hough.cpp +++ b/modules/gpuimgproc/src/generalized_hough.cpp @@ -45,539 +45,15 @@ using namespace cv; using namespace cv::gpu; -#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) - -Ptr cv::gpu::createHoughLinesDetector(float, float, int, bool, int) { throw_no_cuda(); return Ptr(); } - -Ptr cv::gpu::createHoughSegmentDetector(float, float, int, int, int) { throw_no_cuda(); return Ptr(); } - -Ptr cv::gpu::createHoughCirclesDetector(float, float, int, int, int, int, int) { throw_no_cuda(); return Ptr(); } +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUARITHM) Ptr cv::gpu::GeneralizedHough::create(int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ -#include "opencv2/core/utility.hpp" - namespace cv { namespace gpu { namespace cudev { - namespace hough - { - int buildPointList_gpu(PtrStepSzb src, unsigned int* list); - } -}}} - -////////////////////////////////////////////////////////// -// HoughLinesDetector - -namespace cv { namespace gpu { namespace cudev -{ - namespace hough - { - void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20); - int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort); - } -}}} - -namespace -{ - class HoughLinesDetectorImpl : public HoughLinesDetector - { - public: - HoughLinesDetectorImpl(float rho, float theta, int threshold, bool doSort, int maxLines) : - rho_(rho), theta_(theta), threshold_(threshold), doSort_(doSort), maxLines_(maxLines) - { - } - - void detect(InputArray src, OutputArray lines); - void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()); - - void setRho(float rho) { rho_ = rho; } - float getRho() const { return rho_; } - - void setTheta(float theta) { theta_ = theta; } - float getTheta() const { return theta_; } - - void setThreshold(int threshold) { threshold_ = threshold; } - int getThreshold() const { return threshold_; } - - void setDoSort(bool doSort) { doSort_ = doSort; } - bool getDoSort() const { return doSort_; } - - void setMaxLines(int maxLines) { maxLines_ = maxLines; } - int getMaxLines() const { return maxLines_; } - - void write(FileStorage& fs) const - { - fs << "name" << "HoughLinesDetector_GPU" - << "rho" << rho_ - << "theta" << theta_ - << "threshold" << threshold_ - << "doSort" << doSort_ - << "maxLines" << maxLines_; - } - - void read(const FileNode& fn) - { - CV_Assert( String(fn["name"]) == "HoughLinesDetector_GPU" ); - rho_ = (float)fn["rho"]; - theta_ = (float)fn["theta"]; - threshold_ = (int)fn["threshold"]; - doSort_ = (int)fn["doSort"] != 0; - maxLines_ = (int)fn["maxLines"]; - } - - private: - float rho_; - float theta_; - int threshold_; - bool doSort_; - int maxLines_; - - GpuMat accum_; - GpuMat list_; - GpuMat result_; - }; - - void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines) - { - using namespace cv::gpu::cudev::hough; - - GpuMat src = _src.getGpuMat(); - - CV_Assert( src.type() == CV_8UC1 ); - CV_Assert( src.cols < std::numeric_limits::max() ); - CV_Assert( src.rows < std::numeric_limits::max() ); - - ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_); - unsigned int* srcPoints = list_.ptr(); - - const int pointsCount = buildPointList_gpu(src, srcPoints); - if (pointsCount == 0) - { - lines.release(); - return; - } - - const int numangle = cvRound(CV_PI / theta_); - const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_); - CV_Assert( numangle > 0 && numrho > 0 ); - - ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_); - accum_.setTo(Scalar::all(0)); - - DeviceInfo devInfo; - linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); - - ensureSizeIsEnough(2, maxLines_, CV_32FC2, result_); - - int linesCount = linesGetResult_gpu(accum_, result_.ptr(0), result_.ptr(1), maxLines_, rho_, theta_, threshold_, doSort_); - - if (linesCount == 0) - { - lines.release(); - return; - } - - result_.cols = linesCount; - result_.copyTo(lines); - } - - void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes) - { - GpuMat d_lines = _d_lines.getGpuMat(); - - if (d_lines.empty()) - { - h_lines.release(); - if (h_votes.needed()) - h_votes.release(); - return; - } - - CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 ); - - d_lines.row(0).download(h_lines); - - if (h_votes.needed()) - { - GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr(1)); - d_votes.download(h_votes); - } - } -} - -Ptr cv::gpu::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort, int maxLines) -{ - return new HoughLinesDetectorImpl(rho, theta, threshold, doSort, maxLines); -} - -////////////////////////////////////////////////////////// -// HoughLinesP - -namespace cv { namespace gpu { namespace cudev -{ - namespace hough - { - int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength); - } -}}} - -namespace -{ - class PHoughLinesDetectorImpl : public HoughSegmentDetector - { - public: - PHoughLinesDetectorImpl(float rho, float theta, int minLineLength, int maxLineGap, int maxLines) : - rho_(rho), theta_(theta), minLineLength_(minLineLength), maxLineGap_(maxLineGap), maxLines_(maxLines) - { - } - - void detect(InputArray src, OutputArray lines); - - void setRho(float rho) { rho_ = rho; } - float getRho() const { return rho_; } - - void setTheta(float theta) { theta_ = theta; } - float getTheta() const { return theta_; } - - void setMinLineLength(int minLineLength) { minLineLength_ = minLineLength; } - int getMinLineLength() const { return minLineLength_; } - - void setMaxLineGap(int maxLineGap) { maxLineGap_ = maxLineGap; } - int getMaxLineGap() const { return maxLineGap_; } - - void setMaxLines(int maxLines) { maxLines_ = maxLines; } - int getMaxLines() const { return maxLines_; } - - void write(FileStorage& fs) const - { - fs << "name" << "PHoughLinesDetector_GPU" - << "rho" << rho_ - << "theta" << theta_ - << "minLineLength" << minLineLength_ - << "maxLineGap" << maxLineGap_ - << "maxLines" << maxLines_; - } - - void read(const FileNode& fn) - { - CV_Assert( String(fn["name"]) == "PHoughLinesDetector_GPU" ); - rho_ = (float)fn["rho"]; - theta_ = (float)fn["theta"]; - minLineLength_ = (int)fn["minLineLength"]; - maxLineGap_ = (int)fn["maxLineGap"]; - maxLines_ = (int)fn["maxLines"]; - } - - private: - float rho_; - float theta_; - int minLineLength_; - int maxLineGap_; - int maxLines_; - - GpuMat accum_; - GpuMat list_; - GpuMat result_; - }; - - void PHoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines) - { - using namespace cv::gpu::cudev::hough; - - GpuMat src = _src.getGpuMat(); - - CV_Assert( src.type() == CV_8UC1 ); - CV_Assert( src.cols < std::numeric_limits::max() ); - CV_Assert( src.rows < std::numeric_limits::max() ); - - ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_); - unsigned int* srcPoints = list_.ptr(); - - const int pointsCount = buildPointList_gpu(src, srcPoints); - if (pointsCount == 0) - { - lines.release(); - return; - } - - const int numangle = cvRound(CV_PI / theta_); - const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_); - CV_Assert( numangle > 0 && numrho > 0 ); - - ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_); - accum_.setTo(Scalar::all(0)); - - DeviceInfo devInfo; - linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); - - ensureSizeIsEnough(1, maxLines_, CV_32SC4, result_); - - int linesCount = houghLinesProbabilistic_gpu(src, accum_, result_.ptr(), maxLines_, rho_, theta_, maxLineGap_, minLineLength_); - - if (linesCount == 0) - { - lines.release(); - return; - } - - result_.cols = linesCount; - result_.copyTo(lines); - } -} - -Ptr cv::gpu::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines) -{ - return new PHoughLinesDetectorImpl(rho, theta, minLineLength, maxLineGap, maxLines); -} - -////////////////////////////////////////////////////////// -// HoughCircles - -namespace cv { namespace gpu { namespace cudev -{ - namespace hough - { - void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp); - int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold); - int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, - float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20); - } -}}} - -namespace -{ - class HoughCirclesDetectorImpl : public HoughCirclesDetector - { - public: - HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles); - - void detect(InputArray src, OutputArray circles); - - void setDp(float dp) { dp_ = dp; } - float getDp() const { return dp_; } - - void setMinDist(float minDist) { minDist_ = minDist; } - float getMinDist() const { return minDist_; } - - void setCannyThreshold(int cannyThreshold) { cannyThreshold_ = cannyThreshold; } - int getCannyThreshold() const { return cannyThreshold_; } - - void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; } - int getVotesThreshold() const { return votesThreshold_; } - - void setMinRadius(int minRadius) { minRadius_ = minRadius; } - int getMinRadius() const { return minRadius_; } - - void setMaxRadius(int maxRadius) { maxRadius_ = maxRadius; } - int getMaxRadius() const { return maxRadius_; } - - void setMaxCircles(int maxCircles) { maxCircles_ = maxCircles; } - int getMaxCircles() const { return maxCircles_; } - - void write(FileStorage& fs) const - { - fs << "name" << "HoughCirclesDetector_GPU" - << "dp" << dp_ - << "minDist" << minDist_ - << "cannyThreshold" << cannyThreshold_ - << "votesThreshold" << votesThreshold_ - << "minRadius" << minRadius_ - << "maxRadius" << maxRadius_ - << "maxCircles" << maxCircles_; - } - - void read(const FileNode& fn) - { - CV_Assert( String(fn["name"]) == "HoughCirclesDetector_GPU" ); - dp_ = (float)fn["dp"]; - minDist_ = (float)fn["minDist"]; - cannyThreshold_ = (int)fn["cannyThreshold"]; - votesThreshold_ = (int)fn["votesThreshold"]; - minRadius_ = (int)fn["minRadius"]; - maxRadius_ = (int)fn["maxRadius"]; - maxCircles_ = (int)fn["maxCircles"]; - } - - private: - float dp_; - float minDist_; - int cannyThreshold_; - int votesThreshold_; - int minRadius_; - int maxRadius_; - int maxCircles_; - - GpuMat dx_, dy_; - GpuMat edges_; - GpuMat accum_; - GpuMat list_; - GpuMat result_; - Ptr filterDx_; - Ptr filterDy_; - Ptr canny_; - }; - - HoughCirclesDetectorImpl::HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, - int minRadius, int maxRadius, int maxCircles) : - dp_(dp), minDist_(minDist), cannyThreshold_(cannyThreshold), votesThreshold_(votesThreshold), - minRadius_(minRadius), maxRadius_(maxRadius), maxCircles_(maxCircles) - { - canny_ = gpu::createCannyEdgeDetector(std::max(cannyThreshold_ / 2, 1), cannyThreshold_); - - filterDx_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 1, 0); - filterDy_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 0, 1); - } - - void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles) - { - using namespace cv::gpu::cudev::hough; - - GpuMat src = _src.getGpuMat(); - - CV_Assert( src.type() == CV_8UC1 ); - CV_Assert( src.cols < std::numeric_limits::max() ); - CV_Assert( src.rows < std::numeric_limits::max() ); - CV_Assert( dp_ > 0 ); - CV_Assert( minRadius_ > 0 && maxRadius_ > minRadius_ ); - CV_Assert( cannyThreshold_ > 0 ); - CV_Assert( votesThreshold_ > 0 ); - CV_Assert( maxCircles_ > 0 ); - - const float idp = 1.0f / dp_; - - filterDx_->apply(src, dx_); - filterDy_->apply(src, dy_); - - canny_->setLowThreshold(std::max(cannyThreshold_ / 2, 1)); - canny_->setHighThreshold(cannyThreshold_); - - canny_->detect(dx_, dy_, edges_); - - ensureSizeIsEnough(2, src.size().area(), CV_32SC1, list_); - unsigned int* srcPoints = list_.ptr(0); - unsigned int* centers = list_.ptr(1); - - const int pointsCount = buildPointList_gpu(edges_, srcPoints); - if (pointsCount == 0) - { - circles.release(); - return; - } - - ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, accum_); - accum_.setTo(Scalar::all(0)); - - circlesAccumCenters_gpu(srcPoints, pointsCount, dx_, dy_, accum_, minRadius_, maxRadius_, idp); - - int centersCount = buildCentersList_gpu(accum_, centers, votesThreshold_); - if (centersCount == 0) - { - circles.release(); - return; - } - - if (minDist_ > 1) - { - AutoBuffer oldBuf_(centersCount); - AutoBuffer newBuf_(centersCount); - int newCount = 0; - - ushort2* oldBuf = oldBuf_; - ushort2* newBuf = newBuf_; - - cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) ); - - const int cellSize = cvRound(minDist_); - const int gridWidth = (src.cols + cellSize - 1) / cellSize; - const int gridHeight = (src.rows + cellSize - 1) / cellSize; - - std::vector< std::vector > grid(gridWidth * gridHeight); - - const float minDist2 = minDist_ * minDist_; - - for (int i = 0; i < centersCount; ++i) - { - ushort2 p = oldBuf[i]; - - bool good = true; - - int xCell = static_cast(p.x / cellSize); - int yCell = static_cast(p.y / cellSize); - - int x1 = xCell - 1; - int y1 = yCell - 1; - int x2 = xCell + 1; - int y2 = yCell + 1; - - // boundary check - x1 = std::max(0, x1); - y1 = std::max(0, y1); - x2 = std::min(gridWidth - 1, x2); - y2 = std::min(gridHeight - 1, y2); - - for (int yy = y1; yy <= y2; ++yy) - { - for (int xx = x1; xx <= x2; ++xx) - { - std::vector& m = grid[yy * gridWidth + xx]; - - for(size_t j = 0; j < m.size(); ++j) - { - float dx = (float)(p.x - m[j].x); - float dy = (float)(p.y - m[j].y); - - if (dx * dx + dy * dy < minDist2) - { - good = false; - goto break_out; - } - } - } - } - - break_out: - - if(good) - { - grid[yCell * gridWidth + xCell].push_back(p); - - newBuf[newCount++] = p; - } - } - - cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) ); - centersCount = newCount; - } - - ensureSizeIsEnough(1, maxCircles_, CV_32FC3, result_); - - int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, result_.ptr(), maxCircles_, - dp_, minRadius_, maxRadius_, votesThreshold_, deviceSupports(FEATURE_SET_COMPUTE_20)); - - if (circlesCount == 0) - { - circles.release(); - return; - } - - result_.cols = circlesCount; - result_.copyTo(circles); - } -} - -Ptr cv::gpu::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) -{ - return new HoughCirclesDetectorImpl(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles); -} - -////////////////////////////////////////////////////////// -// GeneralizedHough - -namespace cv { namespace gpu { namespace cudev -{ - namespace hough + namespace ght { template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); @@ -585,52 +61,52 @@ namespace cv { namespace gpu { namespace cudev PtrStepSz r_table, int* r_sizes, short2 templCenter, int levels); - void GHT_Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepSzi hist, - float dp, int levels); - int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold); + void Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + PtrStepSz r_table, const int* r_sizes, + PtrStepSzi hist, + float dp, int levels); + int Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold); - void GHT_Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepi hist, int rows, int cols, - float minScale, float scaleStep, int scaleRange, - float dp, int levels); - int GHT_Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize, - float minScale, float scaleStep, float dp, int threshold); + void Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + PtrStepSz r_table, const int* r_sizes, + PtrStepi hist, int rows, int cols, + float minScale, float scaleStep, int scaleRange, + float dp, int levels); + int Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize, + float minScale, float scaleStep, float dp, int threshold); - void GHT_Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepi hist, int rows, int cols, - float minAngle, float angleStep, int angleRange, - float dp, int levels); - int GHT_Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize, - float minAngle, float angleStep, float dp, int threshold); + void Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + PtrStepSz r_table, const int* r_sizes, + PtrStepi hist, int rows, int cols, + float minAngle, float angleStep, int angleRange, + float dp, int levels); + int Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize, + float minAngle, float angleStep, float dp, int threshold); - void GHT_Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2); - void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2); - void GHT_Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - int* sizes, int maxSize, - float xi, float angleEpsilon, int levels, - float2 center, float maxDist); - void GHT_Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - int* sizes, int maxSize, - float xi, float angleEpsilon, int levels, - float2 center, float maxDist); - void GHT_Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist, - float minAngle, float maxAngle, float angleStep, int angleRange, - int levels, int tMaxSize); - void GHT_Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist, - float angle, float angleEpsilon, - float minScale, float maxScale, float iScaleStep, int scaleRange, - int levels, int tMaxSize); - void GHT_Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, - float angle, float angleEpsilon, float scale, - float dp, - int levels, int tMaxSize); - int GHT_Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize, - float angle, int angleVotes, float scale, int scaleVotes, - float dp, int threshold); + void Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2); + void Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2); + void Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + int* sizes, int maxSize, + float xi, float angleEpsilon, int levels, + float2 center, float maxDist); + void Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, + int* sizes, int maxSize, + float xi, float angleEpsilon, int levels, + float2 center, float maxDist); + void Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist, + float minAngle, float maxAngle, float angleStep, int angleRange, + int levels, int tMaxSize); + void Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist, + float angle, float angleEpsilon, + float minScale, float maxScale, float iScaleStep, int scaleRange, + int levels, int tMaxSize); + void Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, + float angle, float angleEpsilon, float scale, + float dp, + int levels, int tMaxSize); + int Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize, + float angle, int angleVotes, float scale, int scaleVotes, + float dp, int threshold); } }}} @@ -889,7 +365,7 @@ namespace void GHT_Pos::buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy) { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; typedef int (*func_t)(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); static const func_t funcs[] = @@ -1077,7 +553,7 @@ namespace void GHT_Ballard_Pos::processTempl() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; CV_Assert(levels > 0); @@ -1103,7 +579,7 @@ namespace void GHT_Ballard_Pos::calcHist() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1)); CV_Assert(dp > 0.0); @@ -1117,22 +593,22 @@ namespace if (edgePointList.cols > 0) { - GHT_Ballard_Pos_calcHist_gpu(edgePointList.ptr(0), edgePointList.ptr(1), edgePointList.cols, - r_table, r_sizes.ptr(), - hist, - (float)dp, levels); + Ballard_Pos_calcHist_gpu(edgePointList.ptr(0), edgePointList.ptr(1), edgePointList.cols, + r_table, r_sizes.ptr(), + hist, + (float)dp, levels); } } void GHT_Ballard_Pos::findPosInHist() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; CV_Assert(votesThreshold > 0); ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf); - posCount = GHT_Ballard_Pos_findPosInHist_gpu(hist, outBuf.ptr(0), outBuf.ptr(1), maxSize, (float)dp, votesThreshold); + posCount = Ballard_Pos_findPosInHist_gpu(hist, outBuf.ptr(0), outBuf.ptr(1), maxSize, (float)dp, votesThreshold); } ///////////////////////////////////// @@ -1181,7 +657,7 @@ namespace void GHT_Ballard_PosScale::calcHist() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1)); CV_Assert(dp > 0.0); @@ -1200,16 +676,16 @@ namespace if (edgePointList.cols > 0) { - GHT_Ballard_PosScale_calcHist_gpu(edgePointList.ptr(0), edgePointList.ptr(1), edgePointList.cols, - r_table, r_sizes.ptr(), - hist, rows, cols, - (float)minScale, (float)scaleStep, scaleRange, (float)dp, levels); + Ballard_PosScale_calcHist_gpu(edgePointList.ptr(0), edgePointList.ptr(1), edgePointList.cols, + r_table, r_sizes.ptr(), + hist, rows, cols, + (float)minScale, (float)scaleStep, scaleRange, (float)dp, levels); } } void GHT_Ballard_PosScale::findPosInHist() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; CV_Assert(votesThreshold > 0); @@ -1220,7 +696,7 @@ namespace ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf); - posCount = GHT_Ballard_PosScale_findPosInHist_gpu(hist, rows, cols, scaleRange, outBuf.ptr(0), outBuf.ptr(1), maxSize, (float)minScale, (float)scaleStep, (float)dp, votesThreshold); + posCount = Ballard_PosScale_findPosInHist_gpu(hist, rows, cols, scaleRange, outBuf.ptr(0), outBuf.ptr(1), maxSize, (float)minScale, (float)scaleStep, (float)dp, votesThreshold); } ///////////////////////////////////// @@ -1269,7 +745,7 @@ namespace void GHT_Ballard_PosRotation::calcHist() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1)); CV_Assert(dp > 0.0); @@ -1288,16 +764,16 @@ namespace if (edgePointList.cols > 0) { - GHT_Ballard_PosRotation_calcHist_gpu(edgePointList.ptr(0), edgePointList.ptr(1), edgePointList.cols, - r_table, r_sizes.ptr(), - hist, rows, cols, - (float)minAngle, (float)angleStep, angleRange, (float)dp, levels); + Ballard_PosRotation_calcHist_gpu(edgePointList.ptr(0), edgePointList.ptr(1), edgePointList.cols, + r_table, r_sizes.ptr(), + hist, rows, cols, + (float)minAngle, (float)angleStep, angleRange, (float)dp, levels); } } void GHT_Ballard_PosRotation::findPosInHist() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; CV_Assert(votesThreshold > 0); @@ -1308,7 +784,7 @@ namespace ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf); - posCount = GHT_Ballard_PosRotation_findPosInHist_gpu(hist, rows, cols, angleRange, outBuf.ptr(0), outBuf.ptr(1), maxSize, (float)minAngle, (float)angleStep, (float)dp, votesThreshold); + posCount = Ballard_PosRotation_findPosInHist_gpu(hist, rows, cols, angleRange, outBuf.ptr(0), outBuf.ptr(1), maxSize, (float)minAngle, (float)angleStep, (float)dp, votesThreshold); } ///////////////////////////////////////// @@ -1476,10 +952,10 @@ namespace void GHT_Guil_Full::processTempl() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; buildFeatureList(templEdges, templDx, templDy, templFeatures, - GHT_Guil_Full_setTemplFeatures, GHT_Guil_Full_buildTemplFeatureList_gpu, + Guil_Full_setTemplFeatures, Guil_Full_buildTemplFeatureList_gpu, true, templCenter); h_buf.resize(templFeatures.sizes.cols); @@ -1489,7 +965,7 @@ namespace void GHT_Guil_Full::processImage() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; CV_Assert(levels > 0); CV_Assert(templFeatures.sizes.cols == levels + 1); @@ -1518,7 +994,7 @@ namespace ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf); buildFeatureList(imageEdges, imageDx, imageDy, imageFeatures, - GHT_Guil_Full_setImageFeatures, GHT_Guil_Full_buildImageFeatureList_gpu, + Guil_Full_setImageFeatures, Guil_Full_buildImageFeatureList_gpu, false); calcOrientation(); @@ -1601,14 +1077,14 @@ namespace void GHT_Guil_Full::calcOrientation() { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; const double iAngleStep = 1.0 / angleStep; const int angleRange = cvCeil((maxAngle - minAngle) * iAngleStep); hist.setTo(Scalar::all(0)); - GHT_Guil_Full_calcOHist_gpu(templFeatures.sizes.ptr(), imageFeatures.sizes.ptr(0), - hist.ptr(), (float)minAngle, (float)maxAngle, (float)angleStep, angleRange, levels, templFeatures.maxSize); + Guil_Full_calcOHist_gpu(templFeatures.sizes.ptr(), imageFeatures.sizes.ptr(0), hist.ptr(), + (float)minAngle, (float)maxAngle, (float)angleStep, angleRange, levels, templFeatures.maxSize); cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) ); angles.clear(); @@ -1625,14 +1101,15 @@ namespace void GHT_Guil_Full::calcScale(double angle) { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; const double iScaleStep = 1.0 / scaleStep; const int scaleRange = cvCeil((maxScale - minScale) * iScaleStep); hist.setTo(Scalar::all(0)); - GHT_Guil_Full_calcSHist_gpu(templFeatures.sizes.ptr(), imageFeatures.sizes.ptr(0), - hist.ptr(), (float)angle, (float)angleEpsilon, (float)minScale, (float)maxScale, (float)iScaleStep, scaleRange, levels, templFeatures.maxSize); + Guil_Full_calcSHist_gpu(templFeatures.sizes.ptr(), imageFeatures.sizes.ptr(0), hist.ptr(), + (float)angle, (float)angleEpsilon, (float)minScale, (float)maxScale, + (float)iScaleStep, scaleRange, levels, templFeatures.maxSize); cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) ); scales.clear(); @@ -1649,14 +1126,15 @@ namespace void GHT_Guil_Full::calcPosition(double angle, int angleVotes, double scale, int scaleVotes) { - using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::ght; hist.setTo(Scalar::all(0)); - GHT_Guil_Full_calcPHist_gpu(templFeatures.sizes.ptr(), imageFeatures.sizes.ptr(0), - hist,(float) (float)angle, (float)angleEpsilon, (float)scale, (float)dp, levels, templFeatures.maxSize); + Guil_Full_calcPHist_gpu(templFeatures.sizes.ptr(), imageFeatures.sizes.ptr(0), hist, + (float)angle, (float)angleEpsilon, (float)scale, (float)dp, levels, templFeatures.maxSize); - posCount = GHT_Guil_Full_findPosInHist_gpu(hist, outBuf.ptr(0), outBuf.ptr(1), - posCount, maxSize, (float)angle, angleVotes, (float)scale, scaleVotes, (float)dp, posThresh); + posCount = Guil_Full_findPosInHist_gpu(hist, outBuf.ptr(0), outBuf.ptr(1), + posCount, maxSize, (float)angle, angleVotes, + (float)scale, scaleVotes, (float)dp, posThresh); } } @@ -1679,10 +1157,11 @@ Ptr cv::gpu::GeneralizedHough::create(int method) case (cv::GeneralizedHough::GHT_POSITION | cv::GeneralizedHough::GHT_SCALE | cv::GeneralizedHough::GHT_ROTATION): CV_Assert( !GHT_Guil_Full_info_auto.name().empty() ); return new GHT_Guil_Full(); - } - CV_Error(Error::StsBadArg, "Unsupported method"); - return Ptr(); + default: + CV_Error(Error::StsBadArg, "Unsupported method"); + return Ptr(); + } } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpuimgproc/src/hough_circles.cpp b/modules/gpuimgproc/src/hough_circles.cpp new file mode 100644 index 000000000..3f1e77174 --- /dev/null +++ b/modules/gpuimgproc/src/hough_circles.cpp @@ -0,0 +1,297 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +Ptr cv::gpu::createHoughCirclesDetector(float, float, int, int, int, int, int) { throw_no_cuda(); return Ptr(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace cudev +{ + namespace hough + { + int buildPointList_gpu(PtrStepSzb src, unsigned int* list); + } + + namespace hough_circles + { + void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp); + int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold); + int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, + float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20); + } +}}} + +namespace +{ + class HoughCirclesDetectorImpl : public HoughCirclesDetector + { + public: + HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles); + + void detect(InputArray src, OutputArray circles); + + void setDp(float dp) { dp_ = dp; } + float getDp() const { return dp_; } + + void setMinDist(float minDist) { minDist_ = minDist; } + float getMinDist() const { return minDist_; } + + void setCannyThreshold(int cannyThreshold) { cannyThreshold_ = cannyThreshold; } + int getCannyThreshold() const { return cannyThreshold_; } + + void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; } + int getVotesThreshold() const { return votesThreshold_; } + + void setMinRadius(int minRadius) { minRadius_ = minRadius; } + int getMinRadius() const { return minRadius_; } + + void setMaxRadius(int maxRadius) { maxRadius_ = maxRadius; } + int getMaxRadius() const { return maxRadius_; } + + void setMaxCircles(int maxCircles) { maxCircles_ = maxCircles; } + int getMaxCircles() const { return maxCircles_; } + + void write(FileStorage& fs) const + { + fs << "name" << "HoughCirclesDetector_GPU" + << "dp" << dp_ + << "minDist" << minDist_ + << "cannyThreshold" << cannyThreshold_ + << "votesThreshold" << votesThreshold_ + << "minRadius" << minRadius_ + << "maxRadius" << maxRadius_ + << "maxCircles" << maxCircles_; + } + + void read(const FileNode& fn) + { + CV_Assert( String(fn["name"]) == "HoughCirclesDetector_GPU" ); + dp_ = (float)fn["dp"]; + minDist_ = (float)fn["minDist"]; + cannyThreshold_ = (int)fn["cannyThreshold"]; + votesThreshold_ = (int)fn["votesThreshold"]; + minRadius_ = (int)fn["minRadius"]; + maxRadius_ = (int)fn["maxRadius"]; + maxCircles_ = (int)fn["maxCircles"]; + } + + private: + float dp_; + float minDist_; + int cannyThreshold_; + int votesThreshold_; + int minRadius_; + int maxRadius_; + int maxCircles_; + + GpuMat dx_, dy_; + GpuMat edges_; + GpuMat accum_; + GpuMat list_; + GpuMat result_; + Ptr filterDx_; + Ptr filterDy_; + Ptr canny_; + }; + + HoughCirclesDetectorImpl::HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, + int minRadius, int maxRadius, int maxCircles) : + dp_(dp), minDist_(minDist), cannyThreshold_(cannyThreshold), votesThreshold_(votesThreshold), + minRadius_(minRadius), maxRadius_(maxRadius), maxCircles_(maxCircles) + { + canny_ = gpu::createCannyEdgeDetector(std::max(cannyThreshold_ / 2, 1), cannyThreshold_); + + filterDx_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 1, 0); + filterDy_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 0, 1); + } + + void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles) + { + using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::hough_circles; + + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8UC1 ); + CV_Assert( src.cols < std::numeric_limits::max() ); + CV_Assert( src.rows < std::numeric_limits::max() ); + CV_Assert( dp_ > 0 ); + CV_Assert( minRadius_ > 0 && maxRadius_ > minRadius_ ); + CV_Assert( cannyThreshold_ > 0 ); + CV_Assert( votesThreshold_ > 0 ); + CV_Assert( maxCircles_ > 0 ); + + const float idp = 1.0f / dp_; + + filterDx_->apply(src, dx_); + filterDy_->apply(src, dy_); + + canny_->setLowThreshold(std::max(cannyThreshold_ / 2, 1)); + canny_->setHighThreshold(cannyThreshold_); + + canny_->detect(dx_, dy_, edges_); + + ensureSizeIsEnough(2, src.size().area(), CV_32SC1, list_); + unsigned int* srcPoints = list_.ptr(0); + unsigned int* centers = list_.ptr(1); + + const int pointsCount = buildPointList_gpu(edges_, srcPoints); + if (pointsCount == 0) + { + circles.release(); + return; + } + + ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, accum_); + accum_.setTo(Scalar::all(0)); + + circlesAccumCenters_gpu(srcPoints, pointsCount, dx_, dy_, accum_, minRadius_, maxRadius_, idp); + + int centersCount = buildCentersList_gpu(accum_, centers, votesThreshold_); + if (centersCount == 0) + { + circles.release(); + return; + } + + if (minDist_ > 1) + { + AutoBuffer oldBuf_(centersCount); + AutoBuffer newBuf_(centersCount); + int newCount = 0; + + ushort2* oldBuf = oldBuf_; + ushort2* newBuf = newBuf_; + + cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) ); + + const int cellSize = cvRound(minDist_); + const int gridWidth = (src.cols + cellSize - 1) / cellSize; + const int gridHeight = (src.rows + cellSize - 1) / cellSize; + + std::vector< std::vector > grid(gridWidth * gridHeight); + + const float minDist2 = minDist_ * minDist_; + + for (int i = 0; i < centersCount; ++i) + { + ushort2 p = oldBuf[i]; + + bool good = true; + + int xCell = static_cast(p.x / cellSize); + int yCell = static_cast(p.y / cellSize); + + int x1 = xCell - 1; + int y1 = yCell - 1; + int x2 = xCell + 1; + int y2 = yCell + 1; + + // boundary check + x1 = std::max(0, x1); + y1 = std::max(0, y1); + x2 = std::min(gridWidth - 1, x2); + y2 = std::min(gridHeight - 1, y2); + + for (int yy = y1; yy <= y2; ++yy) + { + for (int xx = x1; xx <= x2; ++xx) + { + std::vector& m = grid[yy * gridWidth + xx]; + + for(size_t j = 0; j < m.size(); ++j) + { + float dx = (float)(p.x - m[j].x); + float dy = (float)(p.y - m[j].y); + + if (dx * dx + dy * dy < minDist2) + { + good = false; + goto break_out; + } + } + } + } + + break_out: + + if(good) + { + grid[yCell * gridWidth + xCell].push_back(p); + + newBuf[newCount++] = p; + } + } + + cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) ); + centersCount = newCount; + } + + ensureSizeIsEnough(1, maxCircles_, CV_32FC3, result_); + + int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, result_.ptr(), maxCircles_, + dp_, minRadius_, maxRadius_, votesThreshold_, deviceSupports(FEATURE_SET_COMPUTE_20)); + + if (circlesCount == 0) + { + circles.release(); + return; + } + + result_.cols = circlesCount; + result_.copyTo(circles); + } +} + +Ptr cv::gpu::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) +{ + return new HoughCirclesDetectorImpl(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles); +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpuimgproc/src/hough_lines.cpp b/modules/gpuimgproc/src/hough_lines.cpp new file mode 100644 index 000000000..e0dec305d --- /dev/null +++ b/modules/gpuimgproc/src/hough_lines.cpp @@ -0,0 +1,202 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +Ptr cv::gpu::createHoughLinesDetector(float, float, int, bool, int) { throw_no_cuda(); return Ptr(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace cudev +{ + namespace hough + { + int buildPointList_gpu(PtrStepSzb src, unsigned int* list); + } + + namespace hough_lines + { + void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20); + int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort); + } +}}} + +namespace +{ + class HoughLinesDetectorImpl : public HoughLinesDetector + { + public: + HoughLinesDetectorImpl(float rho, float theta, int threshold, bool doSort, int maxLines) : + rho_(rho), theta_(theta), threshold_(threshold), doSort_(doSort), maxLines_(maxLines) + { + } + + void detect(InputArray src, OutputArray lines); + void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()); + + void setRho(float rho) { rho_ = rho; } + float getRho() const { return rho_; } + + void setTheta(float theta) { theta_ = theta; } + float getTheta() const { return theta_; } + + void setThreshold(int threshold) { threshold_ = threshold; } + int getThreshold() const { return threshold_; } + + void setDoSort(bool doSort) { doSort_ = doSort; } + bool getDoSort() const { return doSort_; } + + void setMaxLines(int maxLines) { maxLines_ = maxLines; } + int getMaxLines() const { return maxLines_; } + + void write(FileStorage& fs) const + { + fs << "name" << "HoughLinesDetector_GPU" + << "rho" << rho_ + << "theta" << theta_ + << "threshold" << threshold_ + << "doSort" << doSort_ + << "maxLines" << maxLines_; + } + + void read(const FileNode& fn) + { + CV_Assert( String(fn["name"]) == "HoughLinesDetector_GPU" ); + rho_ = (float)fn["rho"]; + theta_ = (float)fn["theta"]; + threshold_ = (int)fn["threshold"]; + doSort_ = (int)fn["doSort"] != 0; + maxLines_ = (int)fn["maxLines"]; + } + + private: + float rho_; + float theta_; + int threshold_; + bool doSort_; + int maxLines_; + + GpuMat accum_; + GpuMat list_; + GpuMat result_; + }; + + void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines) + { + using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::hough_lines; + + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8UC1 ); + CV_Assert( src.cols < std::numeric_limits::max() ); + CV_Assert( src.rows < std::numeric_limits::max() ); + + ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_); + unsigned int* srcPoints = list_.ptr(); + + const int pointsCount = buildPointList_gpu(src, srcPoints); + if (pointsCount == 0) + { + lines.release(); + return; + } + + const int numangle = cvRound(CV_PI / theta_); + const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_); + CV_Assert( numangle > 0 && numrho > 0 ); + + ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_); + accum_.setTo(Scalar::all(0)); + + DeviceInfo devInfo; + linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); + + ensureSizeIsEnough(2, maxLines_, CV_32FC2, result_); + + int linesCount = linesGetResult_gpu(accum_, result_.ptr(0), result_.ptr(1), maxLines_, rho_, theta_, threshold_, doSort_); + + if (linesCount == 0) + { + lines.release(); + return; + } + + result_.cols = linesCount; + result_.copyTo(lines); + } + + void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes) + { + GpuMat d_lines = _d_lines.getGpuMat(); + + if (d_lines.empty()) + { + h_lines.release(); + if (h_votes.needed()) + h_votes.release(); + return; + } + + CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 ); + + d_lines.row(0).download(h_lines); + + if (h_votes.needed()) + { + GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr(1)); + d_votes.download(h_votes); + } + } +} + +Ptr cv::gpu::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort, int maxLines) +{ + return new HoughLinesDetectorImpl(rho, theta, threshold, doSort, maxLines); +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpuimgproc/src/hough_segments.cpp b/modules/gpuimgproc/src/hough_segments.cpp new file mode 100644 index 000000000..1f11be68b --- /dev/null +++ b/modules/gpuimgproc/src/hough_segments.cpp @@ -0,0 +1,183 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +Ptr cv::gpu::createHoughSegmentDetector(float, float, int, int, int) { throw_no_cuda(); return Ptr(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace cudev +{ + namespace hough + { + int buildPointList_gpu(PtrStepSzb src, unsigned int* list); + } + + namespace hough_lines + { + void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20); + } + + namespace hough_segments + { + int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength); + } +}}} + +namespace +{ + class HoughSegmentDetectorImpl : public HoughSegmentDetector + { + public: + HoughSegmentDetectorImpl(float rho, float theta, int minLineLength, int maxLineGap, int maxLines) : + rho_(rho), theta_(theta), minLineLength_(minLineLength), maxLineGap_(maxLineGap), maxLines_(maxLines) + { + } + + void detect(InputArray src, OutputArray lines); + + void setRho(float rho) { rho_ = rho; } + float getRho() const { return rho_; } + + void setTheta(float theta) { theta_ = theta; } + float getTheta() const { return theta_; } + + void setMinLineLength(int minLineLength) { minLineLength_ = minLineLength; } + int getMinLineLength() const { return minLineLength_; } + + void setMaxLineGap(int maxLineGap) { maxLineGap_ = maxLineGap; } + int getMaxLineGap() const { return maxLineGap_; } + + void setMaxLines(int maxLines) { maxLines_ = maxLines; } + int getMaxLines() const { return maxLines_; } + + void write(FileStorage& fs) const + { + fs << "name" << "PHoughLinesDetector_GPU" + << "rho" << rho_ + << "theta" << theta_ + << "minLineLength" << minLineLength_ + << "maxLineGap" << maxLineGap_ + << "maxLines" << maxLines_; + } + + void read(const FileNode& fn) + { + CV_Assert( String(fn["name"]) == "PHoughLinesDetector_GPU" ); + rho_ = (float)fn["rho"]; + theta_ = (float)fn["theta"]; + minLineLength_ = (int)fn["minLineLength"]; + maxLineGap_ = (int)fn["maxLineGap"]; + maxLines_ = (int)fn["maxLines"]; + } + + private: + float rho_; + float theta_; + int minLineLength_; + int maxLineGap_; + int maxLines_; + + GpuMat accum_; + GpuMat list_; + GpuMat result_; + }; + + void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines) + { + using namespace cv::gpu::cudev::hough; + using namespace cv::gpu::cudev::hough_lines; + using namespace cv::gpu::cudev::hough_segments; + + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8UC1 ); + CV_Assert( src.cols < std::numeric_limits::max() ); + CV_Assert( src.rows < std::numeric_limits::max() ); + + ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_); + unsigned int* srcPoints = list_.ptr(); + + const int pointsCount = buildPointList_gpu(src, srcPoints); + if (pointsCount == 0) + { + lines.release(); + return; + } + + const int numangle = cvRound(CV_PI / theta_); + const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_); + CV_Assert( numangle > 0 && numrho > 0 ); + + ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_); + accum_.setTo(Scalar::all(0)); + + DeviceInfo devInfo; + linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); + + ensureSizeIsEnough(1, maxLines_, CV_32SC4, result_); + + int linesCount = houghLinesProbabilistic_gpu(src, accum_, result_.ptr(), maxLines_, rho_, theta_, maxLineGap_, minLineLength_); + + if (linesCount == 0) + { + lines.release(); + return; + } + + result_.cols = linesCount; + result_.copyTo(lines); + } +} + +Ptr cv::gpu::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines) +{ + return new HoughSegmentDetectorImpl(rho, theta, minLineLength, maxLineGap, maxLines); +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpuimgproc/src/precomp.hpp b/modules/gpuimgproc/src/precomp.hpp index 1417c874b..27068d4cc 100644 --- a/modules/gpuimgproc/src/precomp.hpp +++ b/modules/gpuimgproc/src/precomp.hpp @@ -46,6 +46,7 @@ #include "opencv2/gpuimgproc.hpp" #include "opencv2/gpufilters.hpp" +#include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" #include "opencv2/core/private.gpu.hpp"