moved crossCorr (as NPP_Staging wrapper) into public GPU module part from the internal matchTemplate files

2010-12-22 08:56:16 +00:00
parent f9bcef9003
commit fef06c25b5
5 changed files with 166 additions and 141 deletions
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -40,6 +40,7 @@
 //
 //M*/

+#include <cufft.h>
 #include "internal_shared.hpp"
 #include "opencv2/gpu/device/border_interpolate.hpp"

@@ -749,5 +750,32 @@ namespace cv { namespace gpu { namespace imgproc
        cudaSafeCall(cudaThreadSynchronize());
    }

+    //////////////////////////////////////////////////////////////////////////
+    // multiplyAndNormalizeSpects
+
+    __global__ void multiplyAndNormalizeSpectsKernel(
+            int n, float scale, const cufftComplex* a, 
+            const cufftComplex* b, cufftComplex* c)
+    {
+        int x = blockIdx.x * blockDim.x + threadIdx.x;    
+        if (x < n) 
+        {
+            cufftComplex v = cuCmulf(a[x], cuConjf(b[x]));
+            c[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
+        }
+    }
+
+
+    // Performs per-element multiplication and normalization of two spectrums
+    void multiplyAndNormalizeSpects(int n, float scale, const cufftComplex* a, 
+                                    const cufftComplex* b, cufftComplex* c)
+    {
+        dim3 threads(256);
+        dim3 grid(divUp(n, threads.x));
+
+        multiplyAndNormalizeSpectsKernel<<<grid, threads>>>(n, scale, a, b, c);
+        cudaSafeCall(cudaThreadSynchronize());
+    }
+
 }}}

--- a/modules/gpu/src/cuda/match_template.cu
+++ b/modules/gpu/src/cuda/match_template.cu
@@ -40,7 +40,6 @@
 //
 //M*/

-#include <cufft.h>
 #include "internal_shared.hpp"
 #include "opencv2/gpu/device/vecmath.hpp"

@@ -256,29 +255,6 @@ void matchTemplateNaive_SQDIFF_8U(const DevMem2D image, const DevMem2D templ,
 }


-__global__ void multiplyAndNormalizeSpectsKernel(
-        int n, float scale, const cufftComplex* a, 
-        const cufftComplex* b, cufftComplex* c)
-{
-    int x = blockIdx.x * blockDim.x + threadIdx.x;    
-    if (x < n) 
-    {
-        cufftComplex v = cuCmulf(a[x], cuConjf(b[x]));
-        c[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
-    }
-}
-
-
-void multiplyAndNormalizeSpects(int n, float scale, const cufftComplex* a, 
-                                const cufftComplex* b, cufftComplex* c)
-{
-    dim3 threads(256);
-    dim3 grid(divUp(n, threads.x));
-    multiplyAndNormalizeSpectsKernel<<<grid, threads>>>(n, scale, a, b, c);
-    cudaSafeCall(cudaThreadSynchronize());
-}
-
-
 template <int cn>
 __global__ void matchTemplatePreparedKernel_SQDIFF_8U(
        int w, int h, const PtrStep_<unsigned long long> image_sqsum,