From 916703c6e55759cdad8a0c5f4679bb19454e0c3c Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Thu, 13 Oct 2011 13:02:41 +0000 Subject: [PATCH] Updated optimal block size estimation in gpu::convolve() --- modules/gpu/include/opencv2/gpu/gpu.hpp | 1 + modules/gpu/src/imgproc.cpp | 46 +++++++++----------- modules/gpu/test/test_imgproc.cpp | 56 +++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 30 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index ee7aae996..da9fcf6ac 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -762,6 +762,7 @@ namespace cv ConvolveBuf(Size image_size, Size templ_size) { create(image_size, templ_size); } void create(Size image_size, Size templ_size); + void create(Size image_size, Size templ_size, Size block_size); private: static Size estimateBlockSize(Size result_size, Size templ_size); diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 35878c437..67a36f0f5 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -1520,15 +1520,23 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags) ////////////////////////////////////////////////////////////////////////////// // convolve - void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size) { result_size = Size(image_size.width - templ_size.width + 1, image_size.height - templ_size.height + 1); - block_size = estimateBlockSize(result_size, templ_size); + create(image_size, templ_size, estimateBlockSize(result_size, templ_size)); +} - dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1); - dft_size.height = getOptimalDFTSize(block_size.width + templ_size.height - 1); + +void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size, Size block_size) +{ + result_size = Size(image_size.width - templ_size.width + 1, + image_size.height - templ_size.height + 1); + + this->block_size = block_size; + + dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.))); + dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.))); createContinuous(dft_size, CV_32F, image_block); createContinuous(dft_size, CV_32F, templ_block); createContinuous(dft_size, CV_32F, result_data); @@ -1538,34 +1546,18 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size) createContinuous(1, spect_len, CV_32FC2, templ_spect); createContinuous(1, spect_len, CV_32FC2, result_spect); - block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); - block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); + this->block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); + this->block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); } Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size) { - int scale = 40; - Size bsize_min(512, 512); - - // Check whether we use Fermi generation or newer GPU - if (DeviceInfo().majorVersion() >= 2) - { - bsize_min.width = 1024; - bsize_min.height = 1024; - } - - Size bsize(std::max(templ_size.width * scale, bsize_min.width), - std::max(templ_size.height * scale, bsize_min.height)); - - int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width; - int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height; - bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row; - bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col; - - bsize.width = std::min(bsize.width, result_size.width); - bsize.height = std::min(bsize.height, result_size.height); - return bsize; + int width = (result_size.width + 2) / 3; + int height = (result_size.height + 2) / 3; + width = std::min(width, result_size.width); + height = std::min(height, result_size.height); + return Size(width, height); } diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index 1ff6cf258..3e8abf865 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -3704,8 +3704,9 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy) cv::Mat dstGold; cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED); + double minValGold, maxValGold; cv::Point minLocGold, maxLocGold; - cv::minMaxLoc(dstGold, NULL, NULL, &minLocGold, &maxLocGold); + cv::minMaxLoc(dstGold, &minValGold, &maxValGold, &minLocGold, &maxLocGold); cv::Mat dst; ASSERT_NO_THROW( @@ -3727,8 +3728,57 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy) INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine( testing::ValuesIn(devices()), - testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")), - std::make_pair(std::string("matchtemplate/source-1.png"), std::string("matchtemplate/target-1.png"))))); + testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png"))))); + + +struct MatchTemplate_CCOEF_NORMED_NoThrow : testing::TestWithParam< std::tr1::tuple > > +{ + cv::gpu::DeviceInfo devInfo; + std::string imageName; + std::string patternName; + + cv::Mat image, pattern; + + virtual void SetUp() + { + devInfo = std::tr1::get<0>(GetParam()); + imageName = std::tr1::get<1>(GetParam()).first; + patternName = std::tr1::get<1>(GetParam()).second; + + image = readImage(imageName); + ASSERT_FALSE(image.empty()); + + pattern = readImage(patternName); + ASSERT_FALSE(pattern.empty()); + } +}; + +TEST_P(MatchTemplate_CCOEF_NORMED_NoThrow, NoThrow) +{ + PRINT_PARAM(devInfo); + PRINT_PARAM(imageName); + PRINT_PARAM(patternName); + + cv::Mat dstGold; + cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED); + double minValGold, maxValGold; + cv::Point minLocGold, maxLocGold; + cv::minMaxLoc(dstGold, &minValGold, &maxValGold, &minLocGold, &maxLocGold); + + cv::Mat dst; + ASSERT_NO_THROW( + cv::gpu::GpuMat dev_dst; + cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED); + dev_dst.download(dst); + ); + +} + + +INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED_NoThrow, testing::Combine( + testing::ValuesIn(devices()), + testing::Values(std::make_pair(std::string("matchtemplate/source-1.png"), std::string("matchtemplate/target-1.png"))))); + ////////////////////////////////////////////////////////////////////////////