added support of scaling into gpu::dft, refactored gpu::convolve
This commit is contained in:
@@ -752,7 +752,6 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// mulSpectrums
|
||||
|
||||
|
||||
__global__ void mulSpectrumsKernel(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
|
||||
DevMem2D_<cufftComplex> c)
|
||||
{
|
||||
@@ -776,11 +775,9 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// mulSpectrums_CONJ
|
||||
|
||||
|
||||
__global__ void mulSpectrumsKernel_CONJ(
|
||||
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
|
||||
DevMem2D_<cufftComplex> c)
|
||||
@@ -805,11 +802,9 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// mulAndScaleSpectrums
|
||||
|
||||
|
||||
__global__ void mulAndScaleSpectrumsKernel(
|
||||
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
|
||||
float scale, DevMem2D_<cufftComplex> c)
|
||||
@@ -835,11 +830,9 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// mulAndScaleSpectrums_CONJ
|
||||
|
||||
|
||||
__global__ void mulAndScaleSpectrumsKernel_CONJ(
|
||||
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
|
||||
float scale, DevMem2D_<cufftComplex> c)
|
||||
@@ -865,6 +858,5 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
}}}
|
||||
|
||||
|
@@ -1144,9 +1144,6 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
|
||||
bool is_complex_input = src.channels() == 2;
|
||||
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
|
||||
|
||||
// We don't support scaled transform
|
||||
CV_Assert(!is_scaled_dft);
|
||||
|
||||
// We don't support real-to-real transform
|
||||
CV_Assert(is_complex_input || is_complex_output);
|
||||
|
||||
@@ -1178,6 +1175,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
|
||||
if (is_complex_input)
|
||||
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
|
||||
|
||||
int dft_rows = src_aux.rows;
|
||||
int dft_cols = src_aux.cols;
|
||||
if (is_complex_input && !is_complex_output)
|
||||
dft_cols = (src_aux.cols - 1) * 2 + (int)odd;
|
||||
@@ -1185,9 +1183,9 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
|
||||
|
||||
cufftHandle plan;
|
||||
if (is_1d_input || is_row_dft)
|
||||
cufftPlan1d(&plan, dft_cols, dft_type, src_aux.rows);
|
||||
cufftPlan1d(&plan, dft_cols, dft_type, dft_rows);
|
||||
else
|
||||
cufftPlan2d(&plan, src_aux.rows, dft_cols, dft_type);
|
||||
cufftPlan2d(&plan, dft_rows, dft_cols, dft_type);
|
||||
|
||||
GpuMat dst_data, dst_aux;
|
||||
int dst_cols, dst_rows;
|
||||
@@ -1285,6 +1283,9 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
|
||||
}
|
||||
|
||||
cufftSafeCall(cufftDestroy(plan));
|
||||
|
||||
if (is_scaled_dft)
|
||||
multiply(dst, Scalar::all(1. / (dft_rows * dft_cols)), dst);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@@ -1293,7 +1294,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
|
||||
namespace
|
||||
{
|
||||
// Estimates optimal block size
|
||||
void crossCorrOptBlockSize(int w, int h, int tw, int th, int& bw, int& bh)
|
||||
void convolveOptBlockSize(int w, int h, int tw, int th, int& bw, int& bh)
|
||||
{
|
||||
int major, minor;
|
||||
getComputeCapability(getDevice(), major, minor);
|
||||
@@ -1329,7 +1330,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
|
||||
Size block_size;
|
||||
crossCorrOptBlockSize(result.cols, result.rows, templ.cols, templ.rows,
|
||||
convolveOptBlockSize(result.cols, result.rows, templ.cols, templ.rows,
|
||||
block_size.width, block_size.height);
|
||||
|
||||
Size dft_size;
|
||||
@@ -1367,10 +1368,11 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
||||
{
|
||||
for (int x = 0; x < result.cols; x += block_size.width)
|
||||
{
|
||||
// Locate ROI in the source matrix
|
||||
Size image_roi_size;
|
||||
image_roi_size.width = std::min(x + dft_size.width, image.cols) - x;
|
||||
image_roi_size.height = std::min(y + dft_size.height, image.rows) - y;
|
||||
|
||||
// Locate ROI in the source matrix
|
||||
GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), image.step);
|
||||
|
||||
// Make source image block continous
|
||||
@@ -1386,14 +1388,16 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
||||
cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
|
||||
result_data.ptr<cufftReal>()));
|
||||
|
||||
// Copy result block into appropriate part of the result matrix.
|
||||
// We can't compute it inplace as the result of the CUFFT transforms
|
||||
// is always continous, while the result matrix and its blocks can have gaps.
|
||||
Size result_roi_size;
|
||||
result_roi_size.width = std::min(x + block_size.width, result.cols) - x;
|
||||
result_roi_size.height = std::min(y + block_size.height, result.rows) - y;
|
||||
|
||||
GpuMat result_roi(result_roi_size, CV_32F, (void*)(result.ptr<float>(y) + x), result.step);
|
||||
GpuMat result_block(result_roi_size, CV_32F, result_data.ptr(), dft_size.width * sizeof(cufftReal));
|
||||
|
||||
// Copy result block into appropriate part of the result matrix.
|
||||
// We can't compute it inplace as the result of the CUFFT transforms
|
||||
// is always continous, while the result matrix and its blocks can have gaps.
|
||||
result_block.copyTo(result_roi);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user