moved crossCorr (as NPP_Staging wrapper) into public GPU module part from the internal matchTemplate files

This commit is contained in:
Alexey Spizhevoy
2010-12-22 08:56:16 +00:00
parent f9bcef9003
commit fef06c25b5
5 changed files with 166 additions and 141 deletions

View File

@@ -40,6 +40,7 @@
//
//M*/
#include <cufft.h>
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
@@ -749,5 +750,32 @@ namespace cv { namespace gpu { namespace imgproc
cudaSafeCall(cudaThreadSynchronize());
}
//////////////////////////////////////////////////////////////////////////
// multiplyAndNormalizeSpects
__global__ void multiplyAndNormalizeSpectsKernel(
int n, float scale, const cufftComplex* a,
const cufftComplex* b, cufftComplex* c)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < n)
{
cufftComplex v = cuCmulf(a[x], cuConjf(b[x]));
c[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
}
// Performs per-element multiplication and normalization of two spectrums
void multiplyAndNormalizeSpects(int n, float scale, const cufftComplex* a,
const cufftComplex* b, cufftComplex* c)
{
dim3 threads(256);
dim3 grid(divUp(n, threads.x));
multiplyAndNormalizeSpectsKernel<<<grid, threads>>>(n, scale, a, b, c);
cudaSafeCall(cudaThreadSynchronize());
}
}}}

View File

@@ -40,7 +40,6 @@
//
//M*/
#include <cufft.h>
#include "internal_shared.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
@@ -256,29 +255,6 @@ void matchTemplateNaive_SQDIFF_8U(const DevMem2D image, const DevMem2D templ,
}
__global__ void multiplyAndNormalizeSpectsKernel(
int n, float scale, const cufftComplex* a,
const cufftComplex* b, cufftComplex* c)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < n)
{
cufftComplex v = cuCmulf(a[x], cuConjf(b[x]));
c[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
}
void multiplyAndNormalizeSpects(int n, float scale, const cufftComplex* a,
const cufftComplex* b, cufftComplex* c)
{
dim3 threads(256);
dim3 grid(divUp(n, threads.x));
multiplyAndNormalizeSpectsKernel<<<grid, threads>>>(n, scale, a, b, c);
cudaSafeCall(cudaThreadSynchronize());
}
template <int cn>
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(
int w, int h, const PtrStep_<unsigned long long> image_sqsum,