used new device layer for cv::gpu::integral

This commit is contained in:
Vladislav Vinogradov
2013-08-27 13:32:05 +04:00
parent 224f18b06c
commit 7839dbd2c4
6 changed files with 171 additions and 618 deletions

View File

@@ -294,116 +294,4 @@ void cv::cuda::normalize(InputArray _src, OutputArray dst, double a, double b, i
}
}
////////////////////////////////////////////////////////////////////////
// integral
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
}
}}}
void cv::cuda::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& _stream)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
cudaStream_t stream = StreamAccessor::getStream(_stream);
cv::Size whole;
cv::Point offset;
src.locateROI(whole, offset);
if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
{
ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
cv::cuda::device::imgproc::shfl_integral_gpu(src, buffer, stream);
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
GpuMat dst = _dst.getGpuMat();
dst.setTo(Scalar::all(0), _stream);
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
res.copyTo(inner, _stream);
}
else
{
#ifndef HAVE_OPENCV_CUDALEGACY
throw_no_cuda();
#else
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
GpuMat dst = _dst.getGpuMat();
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
Ncv32u bufSize;
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
NppStStreamHandler h(stream);
ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
dst.ptr<Ncv32u>(), static_cast<int>(dst.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
#endif
}
}
//////////////////////////////////////////////////////////////////////////////
// sqrIntegral
void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& _stream)
{
#ifndef HAVE_OPENCV_CUDALEGACY
(void) _src;
(void) _dst;
(void) _stream;
throw_no_cuda();
#else
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8U );
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
Ncv32u bufSize;
ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
ensureSizeIsEnough(1, bufSize, CV_8U, buf);
cudaStream_t stream = StreamAccessor::getStream(_stream);
NppStStreamHandler h(stream);
_dst.create(src.rows + 1, src.cols + 1, CV_64F);
GpuMat dst = _dst.getGpuMat();
ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step),
dst.ptr<Ncv64u>(0), static_cast<int>(dst.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
#endif
}
#endif