added support of scaling into gpu::dft, refactored gpu::convolve

This commit is contained in:
Alexey Spizhevoy 2010-12-24 06:48:23 +00:00
parent 2026649f73
commit 6702d55711
3 changed files with 19 additions and 23 deletions

View File

@ -752,7 +752,6 @@ namespace cv { namespace gpu { namespace imgproc
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// mulSpectrums // mulSpectrums
__global__ void mulSpectrumsKernel(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, __global__ void mulSpectrumsKernel(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
DevMem2D_<cufftComplex> c) DevMem2D_<cufftComplex> c)
{ {
@ -776,11 +775,9 @@ namespace cv { namespace gpu { namespace imgproc
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// mulSpectrums_CONJ // mulSpectrums_CONJ
__global__ void mulSpectrumsKernel_CONJ( __global__ void mulSpectrumsKernel_CONJ(
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
DevMem2D_<cufftComplex> c) DevMem2D_<cufftComplex> c)
@ -805,11 +802,9 @@ namespace cv { namespace gpu { namespace imgproc
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums // mulAndScaleSpectrums
__global__ void mulAndScaleSpectrumsKernel( __global__ void mulAndScaleSpectrumsKernel(
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
float scale, DevMem2D_<cufftComplex> c) float scale, DevMem2D_<cufftComplex> c)
@ -835,11 +830,9 @@ namespace cv { namespace gpu { namespace imgproc
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums_CONJ // mulAndScaleSpectrums_CONJ
__global__ void mulAndScaleSpectrumsKernel_CONJ( __global__ void mulAndScaleSpectrumsKernel_CONJ(
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
float scale, DevMem2D_<cufftComplex> c) float scale, DevMem2D_<cufftComplex> c)
@ -865,6 +858,5 @@ namespace cv { namespace gpu { namespace imgproc
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
}}} }}}

View File

@ -1144,9 +1144,6 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
bool is_complex_input = src.channels() == 2; bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT); bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
// We don't support scaled transform
CV_Assert(!is_scaled_dft);
// We don't support real-to-real transform // We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output); CV_Assert(is_complex_input || is_complex_output);
@ -1178,6 +1175,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
if (is_complex_input) if (is_complex_input)
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R; dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
int dft_rows = src_aux.rows;
int dft_cols = src_aux.cols; int dft_cols = src_aux.cols;
if (is_complex_input && !is_complex_output) if (is_complex_input && !is_complex_output)
dft_cols = (src_aux.cols - 1) * 2 + (int)odd; dft_cols = (src_aux.cols - 1) * 2 + (int)odd;
@ -1185,9 +1183,9 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
cufftHandle plan; cufftHandle plan;
if (is_1d_input || is_row_dft) if (is_1d_input || is_row_dft)
cufftPlan1d(&plan, dft_cols, dft_type, src_aux.rows); cufftPlan1d(&plan, dft_cols, dft_type, dft_rows);
else else
cufftPlan2d(&plan, src_aux.rows, dft_cols, dft_type); cufftPlan2d(&plan, dft_rows, dft_cols, dft_type);
GpuMat dst_data, dst_aux; GpuMat dst_data, dst_aux;
int dst_cols, dst_rows; int dst_cols, dst_rows;
@ -1285,6 +1283,9 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
} }
cufftSafeCall(cufftDestroy(plan)); cufftSafeCall(cufftDestroy(plan));
if (is_scaled_dft)
multiply(dst, Scalar::all(1. / (dft_rows * dft_cols)), dst);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
@ -1293,7 +1294,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
namespace namespace
{ {
// Estimates optimal block size // Estimates optimal block size
void crossCorrOptBlockSize(int w, int h, int tw, int th, int& bw, int& bh) void convolveOptBlockSize(int w, int h, int tw, int th, int& bw, int& bh)
{ {
int major, minor; int major, minor;
getComputeCapability(getDevice(), major, minor); getComputeCapability(getDevice(), major, minor);
@ -1329,7 +1330,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
Size block_size; Size block_size;
crossCorrOptBlockSize(result.cols, result.rows, templ.cols, templ.rows, convolveOptBlockSize(result.cols, result.rows, templ.cols, templ.rows,
block_size.width, block_size.height); block_size.width, block_size.height);
Size dft_size; Size dft_size;
@ -1367,10 +1368,11 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
{ {
for (int x = 0; x < result.cols; x += block_size.width) for (int x = 0; x < result.cols; x += block_size.width)
{ {
// Locate ROI in the source matrix
Size image_roi_size; Size image_roi_size;
image_roi_size.width = std::min(x + dft_size.width, image.cols) - x; image_roi_size.width = std::min(x + dft_size.width, image.cols) - x;
image_roi_size.height = std::min(y + dft_size.height, image.rows) - y; image_roi_size.height = std::min(y + dft_size.height, image.rows) - y;
// Locate ROI in the source matrix
GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), image.step); GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), image.step);
// Make source image block continous // Make source image block continous
@ -1386,14 +1388,16 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
result_data.ptr<cufftReal>())); result_data.ptr<cufftReal>()));
// Copy result block into appropriate part of the result matrix.
// We can't compute it inplace as the result of the CUFFT transforms
// is always continous, while the result matrix and its blocks can have gaps.
Size result_roi_size; Size result_roi_size;
result_roi_size.width = std::min(x + block_size.width, result.cols) - x; result_roi_size.width = std::min(x + block_size.width, result.cols) - x;
result_roi_size.height = std::min(y + block_size.height, result.rows) - y; result_roi_size.height = std::min(y + block_size.height, result.rows) - y;
GpuMat result_roi(result_roi_size, CV_32F, (void*)(result.ptr<float>(y) + x), result.step); GpuMat result_roi(result_roi_size, CV_32F, (void*)(result.ptr<float>(y) + x), result.step);
GpuMat result_block(result_roi_size, CV_32F, result_data.ptr(), dft_size.width * sizeof(cufftReal)); GpuMat result_block(result_roi_size, CV_32F, result_data.ptr(), dft_size.width * sizeof(cufftReal));
// Copy result block into appropriate part of the result matrix.
// We can't compute it inplace as the result of the CUFFT transforms
// is always continous, while the result matrix and its blocks can have gaps.
result_block.copyTo(result_roi); result_block.copyTo(result_roi);
} }
} }

View File

@ -274,7 +274,7 @@ struct CV_GpuDftTest: CvTest
rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f)); rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f));
} }
bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f, float scale=1.f) bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f)
{ {
if (gold.size() != mine.size()) if (gold.size() != mine.size())
{ {
@ -299,7 +299,7 @@ struct CV_GpuDftTest: CvTest
for (int j = 0; j < gold.cols * gold.channels(); ++j) for (int j = 0; j < gold.cols * gold.channels(); ++j)
{ {
float gold_ = gold.at<float>(i, j); float gold_ = gold.at<float>(i, j);
float mine_ = mine.at<float>(i, j) * scale; float mine_ = mine.at<float>(i, j);
if (fabs(gold_ - mine_) > max_err) if (fabs(gold_ - mine_) > max_err)
{ {
ts->printf(CvTS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j / gold.channels(), i, gold_, mine_); ts->printf(CvTS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j / gold.channels(), i, gold_, mine_);
@ -382,7 +382,7 @@ struct CV_GpuDftTest: CvTest
d_c = GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize()); d_c = GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
} }
dft(GpuMat(a), d_b, 0); dft(GpuMat(a), d_b, 0);
dft(d_b, d_c, DFT_REAL_OUTPUT, 0, odd); dft(d_b, d_c, DFT_REAL_OUTPUT | DFT_SCALE, 0, odd);
if (ok && inplace && d_b.ptr() != d_b_data.ptr()) if (ok && inplace && d_b.ptr() != d_b_data.ptr())
{ {
@ -408,7 +408,7 @@ struct CV_GpuDftTest: CvTest
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
ok = false; ok = false;
} }
if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f, 1.f / (rows * cols)); if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f);
if (!ok) if (!ok)
ts->printf(CvTS::CONSOLE, "testR2CThenC2R failed: hint=%s, cols=%d, rows=%d\n", hint.c_str(), cols, rows); ts->printf(CvTS::CONSOLE, "testR2CThenC2R failed: hint=%s, cols=%d, rows=%d\n", hint.c_str(), cols, rows);
} }