From 916703c6e55759cdad8a0c5f4679bb19454e0c3c Mon Sep 17 00:00:00 2001
From: Alexey Spizhevoy <no@email>
Date: Thu, 13 Oct 2011 13:02:41 +0000
Subject: [PATCH] Updated optimal block size estimation in gpu::convolve()

---
 modules/gpu/include/opencv2/gpu/gpu.hpp |  1 +
 modules/gpu/src/imgproc.cpp             | 46 +++++++++-----------
 modules/gpu/test/test_imgproc.cpp       | 56 +++++++++++++++++++++++--
 3 files changed, 73 insertions(+), 30 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index ee7aae996..da9fcf6ac 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -762,6 +762,7 @@ namespace cv
             ConvolveBuf(Size image_size, Size templ_size) 
                 { create(image_size, templ_size); }
             void create(Size image_size, Size templ_size);
+            void create(Size image_size, Size templ_size, Size block_size);
 
         private:
             static Size estimateBlockSize(Size result_size, Size templ_size);
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp
index 35878c437..67a36f0f5 100644
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -1520,15 +1520,23 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)
 //////////////////////////////////////////////////////////////////////////////
 // convolve
 
-
 void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
 {
     result_size = Size(image_size.width - templ_size.width + 1,
                        image_size.height - templ_size.height + 1);
-    block_size = estimateBlockSize(result_size, templ_size);
+    create(image_size, templ_size, estimateBlockSize(result_size, templ_size));
+}
 
-    dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1);
-    dft_size.height = getOptimalDFTSize(block_size.width + templ_size.height - 1);
+
+void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size, Size block_size)
+{
+    result_size = Size(image_size.width - templ_size.width + 1,
+                       image_size.height - templ_size.height + 1);   
+
+    this->block_size = block_size;
+
+    dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
+    dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
     createContinuous(dft_size, CV_32F, image_block);
     createContinuous(dft_size, CV_32F, templ_block);
     createContinuous(dft_size, CV_32F, result_data);
@@ -1538,34 +1546,18 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
     createContinuous(1, spect_len, CV_32FC2, templ_spect);
     createContinuous(1, spect_len, CV_32FC2, result_spect);
 
-    block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
-    block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
+    this->block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
+    this->block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
 }
 
 
 Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)
 {
-    int scale = 40;
-    Size bsize_min(512, 512);
-
-    // Check whether we use Fermi generation or newer GPU
-    if (DeviceInfo().majorVersion() >= 2)
-    {
-        bsize_min.width = 1024;
-        bsize_min.height = 1024;
-    }
-
-    Size bsize(std::max(templ_size.width * scale, bsize_min.width),
-               std::max(templ_size.height * scale, bsize_min.height));
-
-    int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width;
-    int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height;
-    bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row;
-    bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col;
-
-    bsize.width = std::min(bsize.width, result_size.width);
-    bsize.height = std::min(bsize.height, result_size.height);
-    return bsize;
+    int width = (result_size.width + 2) / 3;
+    int height = (result_size.height + 2) / 3;
+    width = std::min(width, result_size.width);
+    height = std::min(height, result_size.height);
+    return Size(width, height);
 }
 
 
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp
index 1ff6cf258..3e8abf865 100644
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -3704,8 +3704,9 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
 
     cv::Mat dstGold;
     cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED);
+    double minValGold, maxValGold;
     cv::Point minLocGold, maxLocGold;
-    cv::minMaxLoc(dstGold, NULL, NULL, &minLocGold, &maxLocGold);
+    cv::minMaxLoc(dstGold, &minValGold, &maxValGold, &minLocGold, &maxLocGold);
 
     cv::Mat dst;
     ASSERT_NO_THROW(
@@ -3727,8 +3728,57 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
 
 INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine(
                         testing::ValuesIn(devices()),
-                        testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")),
-                                        std::make_pair(std::string("matchtemplate/source-1.png"), std::string("matchtemplate/target-1.png")))));
+                        testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
+
+
+struct MatchTemplate_CCOEF_NORMED_NoThrow : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std::pair<std::string, std::string> > >
+{
+    cv::gpu::DeviceInfo devInfo;
+    std::string imageName;
+    std::string patternName;
+
+    cv::Mat image, pattern;
+
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        imageName = std::tr1::get<1>(GetParam()).first;
+        patternName = std::tr1::get<1>(GetParam()).second;
+
+        image = readImage(imageName);
+        ASSERT_FALSE(image.empty());
+
+        pattern = readImage(patternName);
+        ASSERT_FALSE(pattern.empty());
+    }
+};
+
+TEST_P(MatchTemplate_CCOEF_NORMED_NoThrow, NoThrow)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(imageName);
+    PRINT_PARAM(patternName);
+
+    cv::Mat dstGold;
+    cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED);
+    double minValGold, maxValGold;
+    cv::Point minLocGold, maxLocGold;
+    cv::minMaxLoc(dstGold, &minValGold, &maxValGold, &minLocGold, &maxLocGold);
+
+    cv::Mat dst;
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED);
+        dev_dst.download(dst);
+    );
+
+}
+
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED_NoThrow, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(std::make_pair(std::string("matchtemplate/source-1.png"), std::string("matchtemplate/target-1.png")))));
+
 
 
 ////////////////////////////////////////////////////////////////////////////