diff --git a/modules/gpu/doc/image_filtering.rst b/modules/gpu/doc/image_filtering.rst index 7b21fc353..2e674cf4f 100644 --- a/modules/gpu/doc/image_filtering.rst +++ b/modules/gpu/doc/image_filtering.rst @@ -395,7 +395,7 @@ Applies the non-separable 2D linear filter to an image. .. ocv:function:: void gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null()) - :param src: Source image. ``CV_8UC1`` and ``CV_8UC4`` source types are supported. + :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` and ``CV_32FC1`` source types are supported. :param dst: Destination image. The size and the number of channels is the same as ``src`` . diff --git a/modules/gpu/perf/perf_filters.cpp b/modules/gpu/perf/perf_filters.cpp index f6ba4a9d3..932255728 100644 --- a/modules/gpu/perf/perf_filters.cpp +++ b/modules/gpu/perf/perf_filters.cpp @@ -102,8 +102,8 @@ GPU_PERF_TEST(LinearFilter, cv::gpu::DeviceInfo, cv::Size, perf::MatType, int) INSTANTIATE_TEST_CASE_P(Filter, LinearFilter, testing::Combine( ALL_DEVICES, GPU_TYPICAL_MAT_SIZES, - testing::Values(CV_8UC1, CV_8UC4), - testing::Values(3, 5))); + testing::Values(CV_8UC1, CV_8UC4, CV_32FC1), + testing::Values(3, 5, 7, 9))); ////////////////////////////////////////////////////////////////////// // SeparableLinearFilter diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index 1418d09ba..8c31d83fb 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -727,11 +727,12 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine( ////////////////////////////////////////////////////////////////////// // Convolve -GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int) +GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int, bool) { cv::gpu::DeviceInfo devInfo = GET_PARAM(0); cv::Size size = GET_PARAM(1); int templ_size = GET_PARAM(2); + bool ccorr = GET_PARAM(3); cv::gpu::setDevice(devInfo.deviceID()); @@ -748,14 +749,15 @@ GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int) TEST_CYCLE() { - cv::gpu::convolve(image, templ, dst, false, buf); + cv::gpu::convolve(image, templ, dst, ccorr, buf); } } INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine( ALL_DEVICES, GPU_TYPICAL_MAT_SIZES, - testing::Values(3, 9, 27, 32, 64))); + testing::Values(3, 9, 27, 32, 64), + testing::Bool())); ////////////////////////////////////////////////////////////////////// // PyrDown diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index 72053ad3b..1a302f455 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -904,79 +904,49 @@ namespace cv { namespace gpu { namespace device cudaSafeCall(cudaDeviceSynchronize()); } - ////////////////////////////////////////////////////////////////////////// - // convolve + // filter2D - #define CONVOLVE_MAX_KERNEL_SIZE 17 + #define FILTER2D_MAX_KERNEL_SIZE 16 - __constant__ float c_convolveKernel[CONVOLVE_MAX_KERNEL_SIZE * CONVOLVE_MAX_KERNEL_SIZE]; + __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE]; - __global__ void convolve(const DevMem2Df src, PtrStepf dst, int kWidth, int kHeight) + texture filter2DTex(0, cudaFilterModePoint, cudaAddressModeBorder); + + __global__ void filter2D(int ofsX, int ofsY, DevMem2Df dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY) { - __shared__ float smem[16 + 2 * 8][16 + 2 * 8]; - const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; - // x | x 0 | 0 - // ----------- - // x | x 0 | 0 - // 0 | 0 0 | 0 - // ----------- - // 0 | 0 0 | 0 - smem[threadIdx.y][threadIdx.x] = src.ptr(::min(::max(y - 8, 0), src.rows - 1))[::min(::max(x - 8, 0), src.cols - 1)]; + if (x >= dst.cols || y >= dst.rows) + return; - // 0 | 0 x | x - // ----------- - // 0 | 0 x | x - // 0 | 0 0 | 0 - // ----------- - // 0 | 0 0 | 0 - smem[threadIdx.y][threadIdx.x + 16] = src.ptr(::min(::max(y - 8, 0), src.rows - 1))[::min(x + 8, src.cols - 1)]; + float res = 0; - // 0 | 0 0 | 0 - // ----------- - // 0 | 0 0 | 0 - // x | x 0 | 0 - // ----------- - // x | x 0 | 0 - smem[threadIdx.y + 16][threadIdx.x] = src.ptr(::min(y + 8, src.rows - 1))[::min(::max(x - 8, 0), src.cols - 1)]; + const int baseX = ofsX + x - anchorX; + const int baseY = ofsY + y - anchorY; - // 0 | 0 0 | 0 - // ----------- - // 0 | 0 0 | 0 - // 0 | 0 x | x - // ----------- - // 0 | 0 x | x - smem[threadIdx.y + 16][threadIdx.x + 16] = src.ptr(::min(y + 8, src.rows - 1))[::min(x + 8, src.cols - 1)]; + int kInd = 0; - __syncthreads(); - - if (x < src.cols && y < src.rows) + for (int i = 0; i < kHeight; ++i) { - float res = 0; - - for (int i = 0; i < kHeight; ++i) - { - for (int j = 0; j < kWidth; ++j) - { - res += smem[threadIdx.y + 8 - kHeight / 2 + i][threadIdx.x + 8 - kWidth / 2 + j] * c_convolveKernel[i * kWidth + j]; - } - } - - dst.ptr(y)[x] = res; + for (int j = 0; j < kWidth; ++j) + res += tex2D(filter2DTex, baseX + j, baseY + i) * c_filter2DKernel[kInd++]; } + + dst.ptr(y)[x] = res; } - void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream) + void filter2D_gpu(DevMem2Df src, int ofsX, int ofsY, DevMem2Df dst, int kWidth, int kHeight, int anchorX, int anchorY, float* kernel, cudaStream_t stream) { - cudaSafeCall(cudaMemcpyToSymbol(c_convolveKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); + cudaSafeCall(cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); const dim3 block(16, 16); - const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); + const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); - convolve<<>>(src, dst, kWidth, kHeight); + bindTexture(&filter2DTex, src); + + filter2D<<>>(ofsX, ofsY, dst, kWidth, kHeight, anchorX, anchorY); cudaSafeCall(cudaGetLastError()); if (stream == 0) diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp index e9977c6d3..7af32c1af 100644 --- a/modules/gpu/src/filtering.cpp +++ b/modules/gpu/src/filtering.cpp @@ -659,6 +659,14 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke //////////////////////////////////////////////////////////////////////////////////////////////////// // Linear Filter +namespace cv { namespace gpu { namespace device +{ + namespace imgproc + { + void filter2D_gpu(DevMem2Df src, int ofsX, int ofsY, DevMem2Df dst, int kWidth, int kHeight, int anchorX, int anchorY, float* kernel, cudaStream_t stream); + } +}}} + namespace { typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, @@ -696,20 +704,56 @@ namespace Npp32s nDivisor; nppFilter2D_t func; }; + + struct GpuLinearFilter : public BaseFilter_GPU + { + GpuLinearFilter(Size ksize_, Point anchor_, const GpuMat& kernel_) : + BaseFilter_GPU(ksize_, anchor_), kernel(kernel_) {} + + virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) + { + using namespace cv::gpu::device::imgproc; + + Point ofs; + Size wholeSize; + src.locateROI(wholeSize, ofs); + GpuMat srcWhole(wholeSize, src.type(), src.datastart); + + filter2D_gpu(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr(), StreamAccessor::getStream(stream)); + } + + GpuMat kernel; + }; } Ptr cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, Point anchor) { - static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R}; + CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1); + CV_Assert(dstType == srcType); - CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); + if (srcType == CV_32FC1) + { + CV_Assert(ksize.width * ksize.height <= 16 * 16); + + GpuMat gpu_krnl; + normalizeKernel(kernel, gpu_krnl, CV_32F); + + normalizeAnchor(anchor, ksize); + + return Ptr(new GpuLinearFilter(ksize, anchor, gpu_krnl)); + } + else + { + static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R}; - GpuMat gpu_krnl; - int nDivisor; - normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); - normalizeAnchor(anchor, ksize); + GpuMat gpu_krnl; + int nDivisor; + normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); - return Ptr(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)])); + normalizeAnchor(anchor, ksize); + + return Ptr(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)])); + } } Ptr cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Point& anchor) @@ -729,7 +773,8 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); Ptr f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor); - f->apply(src, dst, Rect(0, 0, -1, -1), stream); + + f->apply(src, dst, src.type() == CV_32FC1 ? Rect(0, 0, src.cols, src.rows) : Rect(0, 0, -1, -1), stream); } //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 205c5b2ed..7cb631755 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -1673,137 +1673,82 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, convolve(image, templ, result, ccorr, buf); } -namespace cv { namespace gpu { namespace device -{ - namespace imgproc - { - void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream); - } -}}} - void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream) { using namespace ::cv::gpu::device::imgproc; #ifndef HAVE_CUFFT - - CV_Assert(image.type() == CV_32F); - CV_Assert(templ.type() == CV_32F); - CV_Assert(templ.cols <= 17 && templ.rows <= 17); - - result.create(image.size(), CV_32F); - - GpuMat& contKernel = buf.templ_block; - - if (templ.isContinuous()) - contKernel = templ; - else - { - contKernel = createContinuous(templ.size(), templ.type()); - - if (stream) - stream.enqueueCopy(templ, contKernel); - else - templ.copyTo(contKernel); - } - - convolve_gpu(image, result, templ.cols, templ.rows, contKernel.ptr(), StreamAccessor::getStream(stream)); - + throw_nogpu(); #else - StaticAssert::check(); StaticAssert::check(); CV_Assert(image.type() == CV_32F); CV_Assert(templ.type() == CV_32F); - if (templ.cols < 13 && templ.rows < 13) + buf.create(image.size(), templ.size()); + result.create(buf.result_size, CV_32F); + + Size& block_size = buf.block_size; + Size& dft_size = buf.dft_size; + + GpuMat& image_block = buf.image_block; + GpuMat& templ_block = buf.templ_block; + GpuMat& result_data = buf.result_data; + + GpuMat& image_spect = buf.image_spect; + GpuMat& templ_spect = buf.templ_spect; + GpuMat& result_spect = buf.result_spect; + + cufftHandle planR2C, planC2R; + cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R)); + cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C)); + + cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) ); + cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) ); + + GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step); + copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, + templ_block.cols - templ_roi.cols, 0, Scalar(), stream); + + cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr(), + templ_spect.ptr())); + + // Process all blocks of the result matrix + for (int y = 0; y < result.rows; y += block_size.height) { - result.create(image.size(), CV_32F); - - GpuMat& contKernel = buf.templ_block; - - if (templ.isContinuous()) - contKernel = templ; - else + for (int x = 0; x < result.cols; x += block_size.width) { - contKernel = createContinuous(templ.size(), templ.type()); + Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, + std::min(y + dft_size.height, image.rows) - y); + GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr(y) + x), + image.step); + copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, + 0, image_block.cols - image_roi.cols, 0, Scalar(), stream); + + cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr(), + image_spect.ptr())); + mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0, + 1.f / dft_size.area(), ccorr, stream); + cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr(), + result_data.ptr())); + + Size result_roi_size(std::min(x + block_size.width, result.cols) - x, + std::min(y + block_size.height, result.rows) - y); + GpuMat result_roi(result_roi_size, result.type(), + (void*)(result.ptr(y) + x), result.step); + GpuMat result_block(result_roi_size, result_data.type(), + result_data.ptr(), result_data.step); if (stream) - stream.enqueueCopy(templ, contKernel); + stream.enqueueCopy(result_block, result_roi); else - templ.copyTo(contKernel); + result_block.copyTo(result_roi); } - - convolve_gpu(image, result, templ.cols, templ.rows, contKernel.ptr(), StreamAccessor::getStream(stream)); - } - else - { - buf.create(image.size(), templ.size()); - result.create(buf.result_size, CV_32F); - - Size& block_size = buf.block_size; - Size& dft_size = buf.dft_size; - - GpuMat& image_block = buf.image_block; - GpuMat& templ_block = buf.templ_block; - GpuMat& result_data = buf.result_data; - - GpuMat& image_spect = buf.image_spect; - GpuMat& templ_spect = buf.templ_spect; - GpuMat& result_spect = buf.result_spect; - - cufftHandle planR2C, planC2R; - cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R)); - cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C)); - - cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) ); - cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) ); - - GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step); - copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, - templ_block.cols - templ_roi.cols, 0, Scalar(), stream); - - cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr(), - templ_spect.ptr())); - - // Process all blocks of the result matrix - for (int y = 0; y < result.rows; y += block_size.height) - { - for (int x = 0; x < result.cols; x += block_size.width) - { - Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, - std::min(y + dft_size.height, image.rows) - y); - GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr(y) + x), - image.step); - copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, - 0, image_block.cols - image_roi.cols, 0, Scalar(), stream); - - cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr(), - image_spect.ptr())); - mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0, - 1.f / dft_size.area(), ccorr, stream); - cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr(), - result_data.ptr())); - - Size result_roi_size(std::min(x + block_size.width, result.cols) - x, - std::min(y + block_size.height, result.rows) - y); - GpuMat result_roi(result_roi_size, result.type(), - (void*)(result.ptr(y) + x), result.step); - GpuMat result_block(result_roi_size, result_data.type(), - result_data.ptr(), result_data.step); - - if (stream) - stream.enqueueCopy(result_block, result_roi); - else - result_block.copyTo(result_roi); - } - } - - cufftSafeCall(cufftDestroy(planR2C)); - cufftSafeCall(cufftDestroy(planC2R)); } + cufftSafeCall(cufftDestroy(planR2C)); + cufftSafeCall(cufftDestroy(planC2R)); #endif } diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp index d5c668f63..58b473a9e 100644 --- a/modules/gpu/test/test_filters.cpp +++ b/modules/gpu/test/test_filters.cpp @@ -629,4 +629,94 @@ INSTANTIATE_TEST_CASE_P(Filter, MorphEx, Combine( Values((int)cv::MORPH_OPEN, (int)cv::MORPH_CLOSE, (int)cv::MORPH_GRADIENT, (int)cv::MORPH_TOPHAT, (int)cv::MORPH_BLACKHAT), USE_ROI)); +///////////////////////////////////////////////////////////////////////////////////////////////// +// filter2D + +PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, int, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + int ksize; + bool useRoi; + + cv::Mat img; + cv::Mat kernel; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + ksize = GET_PARAM(1); + useRoi = GET_PARAM(2); + + cv::gpu::setDevice(devInfo.deviceID()); + + img = readImage("stereobp/aloe-L.png"); + ASSERT_FALSE(img.empty()); + + kernel = cv::Mat::ones(ksize, ksize, CV_32FC1); + } +}; + +TEST_P(Filter2D, Rgba) +{ + cv::Mat src; + cv::cvtColor(img, src, CV_BGR2BGRA); + + cv::Mat dst_gold; + cv::filter2D(src, dst_gold, -1, kernel, cv::Point(-1, -1), 0, cv::BORDER_CONSTANT); + + cv::Mat dst; + + cv::gpu::GpuMat dev_dst; + + cv::gpu::filter2D(loadMat(src, useRoi), dev_dst, -1, kernel); + + dev_dst.download(dst); + + EXPECT_MAT_NEAR_KSIZE(dst_gold, dst, ksize, 0.0); +} + +TEST_P(Filter2D, Gray) +{ + cv::Mat src; + cv::cvtColor(img, src, CV_BGR2GRAY); + + cv::Mat dst_gold; + cv::filter2D(src, dst_gold, -1, kernel, cv::Point(-1, -1), 0, cv::BORDER_CONSTANT); + + cv::Mat dst; + + cv::gpu::GpuMat dev_dst; + + cv::gpu::filter2D(loadMat(src, useRoi), dev_dst, -1, kernel); + + dev_dst.download(dst); + + EXPECT_MAT_NEAR_KSIZE(dst_gold, dst, ksize, 0.0); +} + +TEST_P(Filter2D, 32FC1) +{ + cv::Mat src; + cv::cvtColor(img, src, CV_BGR2GRAY); + src.convertTo(src, CV_32F, 1.0 / 255.0); + + cv::Mat dst_gold; + cv::filter2D(src, dst_gold, -1, kernel, cv::Point(-1, -1), 0, cv::BORDER_CONSTANT); + + cv::Mat dst; + + cv::gpu::GpuMat dev_dst; + + cv::gpu::filter2D(loadMat(src, useRoi), dev_dst, -1, kernel); + + dev_dst.download(dst); + + EXPECT_MAT_NEAR_KSIZE(dst_gold, dst, ksize, 1e-3); +} + +INSTANTIATE_TEST_CASE_P(Filter, Filter2D, Combine( + ALL_DEVICES, + Values(3, 5, 7, 11, 13, 15), + USE_ROI)); + #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index f25cb36ae..018a3e62b 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -2573,36 +2573,36 @@ INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, ALL_DEVICES); /////////////////////////////////////////////////////////////////////////////////////////////////////// // cornerHarris -PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, Border) +PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, Border, int, int) { cv::gpu::DeviceInfo devInfo; int type; int borderType; + int blockSize; + int apertureSize; cv::Mat src; - int blockSize; - int apertureSize; double k; cv::Mat dst_gold; - + virtual void SetUp() { devInfo = GET_PARAM(0); type = GET_PARAM(1); borderType = GET_PARAM(2); + blockSize = GET_PARAM(3); + apertureSize = GET_PARAM(4); cv::gpu::setDevice(devInfo.deviceID()); - + cv::RNG& rng = TS::ptr()->get_rng(); - + cv::Mat img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE); ASSERT_FALSE(img.empty()); - + img.convertTo(src, type, type == CV_32F ? 1.0 / 255.0 : 1.0); - - blockSize = 1 + rng.next() % 5; - apertureSize = 1 + 2 * (rng.next() % 4); + k = rng.uniform(0.1, 0.9); cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); @@ -2612,7 +2612,7 @@ PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, Border) TEST_P(CornerHarris, Accuracy) { cv::Mat dst; - + cv::gpu::GpuMat dev_dst; cv::gpu::cornerHarris(loadMat(src), dev_dst, blockSize, apertureSize, k, borderType); @@ -2625,21 +2625,23 @@ TEST_P(CornerHarris, Accuracy) INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, Combine( ALL_DEVICES, Values(CV_8UC1, CV_32FC1), - Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT))); + Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT), + Values(3, 5, 7), + Values(0, 3, 5, 7))); /////////////////////////////////////////////////////////////////////////////////////////////////////// // cornerMinEigen -PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, Border) +PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, Border, int, int) { cv::gpu::DeviceInfo devInfo; int type; int borderType; - - cv::Mat src; int blockSize; int apertureSize; + cv::Mat src; + cv::Mat dst_gold; virtual void SetUp() @@ -2647,18 +2649,17 @@ PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, Border) devInfo = GET_PARAM(0); type = GET_PARAM(1); borderType = GET_PARAM(2); + blockSize = GET_PARAM(3); + apertureSize = GET_PARAM(4); + + cv::gpu::setDevice(devInfo.deviceID()); - cv::gpu::setDevice(devInfo.deviceID()); - cv::RNG& rng = TS::ptr()->get_rng(); - + cv::Mat img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE); ASSERT_FALSE(img.empty()); img.convertTo(src, type, type == CV_32F ? 1.0 / 255.0 : 1.0); - - blockSize = 1 + rng.next() % 5; - apertureSize = 1 + 2 * (rng.next() % 4); cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); } @@ -2667,7 +2668,7 @@ PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, Border) TEST_P(CornerMinEigen, Accuracy) { cv::Mat dst; - + cv::gpu::GpuMat dev_dst; cv::gpu::cornerMinEigenVal(loadMat(src), dev_dst, blockSize, apertureSize, borderType); @@ -2680,7 +2681,9 @@ TEST_P(CornerMinEigen, Accuracy) INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigen, Combine( ALL_DEVICES, Values(CV_8UC1, CV_32FC1), - Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT))); + Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT), + Values(3, 5, 7), + Values(0, 3, 5, 7))); //////////////////////////////////////////////////////////////////////// // ColumnSum @@ -3641,12 +3644,54 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine( //////////////////////////////////////////////////////// // convolve -PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, int) +namespace +{ + void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false) + { + // reallocate the output array if needed + C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type()); + Size dftSize; + + // compute the size of DFT transform + dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1); + dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1); + + // allocate temporary buffers and initialize them with 0’s + cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0)); + cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0)); + + // copy A and B to the top-left corners of tempA and tempB, respectively + cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows)); + A.copyTo(roiA); + cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows)); + B.copyTo(roiB); + + // now transform the padded A & B in-place; + // use "nonzeroRows" hint for faster processing + cv::dft(tempA, tempA, 0, A.rows); + cv::dft(tempB, tempB, 0, B.rows); + + // multiply the spectrums; + // the function handles packed spectrum representations well + cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr); + + // transform the product back from the frequency domain. + // Even though all the result rows will be non-zero, + // you need only the first C.rows of them, and thus you + // pass nonzeroRows == C.rows + cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows); + + // now copy the result back to C. + tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C); + } +} + +PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, int, bool) { cv::gpu::DeviceInfo devInfo; int ksize; + bool ccorr; - cv::Size size; cv::Mat src; cv::Mat kernel; @@ -3656,36 +3701,38 @@ PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, int) { devInfo = GET_PARAM(0); ksize = GET_PARAM(1); + ccorr = GET_PARAM(2); cv::gpu::setDevice(devInfo.deviceID()); cv::RNG& rng = TS::ptr()->get_rng(); - size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200)); + cv::Size size(rng.uniform(200, 400), rng.uniform(200, 400)); - src = randomMat(rng, size, CV_32FC1, 0.0, 255.0, false); + src = randomMat(rng, size, CV_32FC1, 0.0, 100.0, false); kernel = randomMat(rng, cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0, false); - cv::filter2D(src, dst_gold, CV_32F, kernel, cv::Point(-1, -1), 0, cv::BORDER_REPLICATE); + convolveDFT(src, kernel, dst_gold, ccorr); } }; TEST_P(Convolve, Accuracy) -{ +{ cv::Mat dst; cv::gpu::GpuMat d_dst; - cv::gpu::convolve(loadMat(src), loadMat(kernel), d_dst); + cv::gpu::convolve(loadMat(src), loadMat(kernel), d_dst, ccorr); d_dst.download(dst); - EXPECT_MAT_NEAR(dst, dst_gold, 1e-2); + EXPECT_MAT_NEAR(dst, dst_gold, 1e-1); } INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, Combine( ALL_DEVICES, - Values(3, 5, 7, 9, 11))); + Values(3, 7, 11, 17, 19, 23, 45), + Bool())); #endif // HAVE_CUDA