added gpu::dft implemented via CUFFT
This commit is contained in:
parent
da1fb6c50a
commit
09735fd208
@ -628,15 +628,28 @@ namespace cv
|
||||
//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
|
||||
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
|
||||
|
||||
//! performs per-element multiplication of two full (i.e. not packed) Fourier spectrums
|
||||
//! supports only 32FC2 matrixes (interleaved format)
|
||||
//! performs per-element multiplication of two full (not packed) Fourier spectrums
|
||||
//! supports 32FC2 matrixes only (interleaved format)
|
||||
CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false);
|
||||
|
||||
//! performs per-element multiplication of two full (i.e. not packed) Fourier spectrums
|
||||
//! supports only 32FC2 matrixes (interleaved format)
|
||||
//! performs per-element multiplication of two full (not packed) Fourier spectrums
|
||||
//! supports 32FC2 matrixes only (interleaved format)
|
||||
CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags,
|
||||
float scale, bool conjB=false);
|
||||
|
||||
//! performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix
|
||||
//!
|
||||
//! If the source matrix is not continous, then additional copy will be done,
|
||||
//! so to avoid copying ensure the source matrix is continous one.
|
||||
//!
|
||||
//! Being implemented via CUFFT real-to-complex transform result contains only non-redundant values
|
||||
//! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved.
|
||||
//!
|
||||
//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format, which
|
||||
//! doesn't allow us to retrieve parity of the destiantion matrix dimension (along which the first step
|
||||
//! of DFT is performed). You must specifiy odd case explicitely.
|
||||
CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, int flags=0, int nonZeroRows=0, bool odd=false);
|
||||
|
||||
//! computes convolution (or cross-correlation) of two images using discrete Fourier transform
|
||||
//! supports source images of 32FC1 type only
|
||||
//! result matrix will have 32FC1 type
|
||||
|
@ -76,6 +76,7 @@ void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { thro
|
||||
void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }
|
||||
void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); }
|
||||
void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool) { throw_nogpu(); }
|
||||
void cv::gpu::dft(const GpuMat&, GpuMat&, int, int, bool) { throw_nogpu(); }
|
||||
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }
|
||||
|
||||
|
||||
@ -1126,6 +1127,164 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,
|
||||
caller(a, b, scale, c);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// dft
|
||||
|
||||
void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bool odd)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
|
||||
|
||||
// We don't support unpacked output (in the case of real input)
|
||||
CV_Assert(!(flags & DFT_COMPLEX_OUTPUT));
|
||||
|
||||
bool is_1d_input = (src.rows == 1) || (src.cols == 1);
|
||||
int is_row_dft = flags & DFT_ROWS;
|
||||
int is_scaled_dft = flags & DFT_SCALE;
|
||||
int is_inverse = flags & DFT_INVERSE;
|
||||
bool is_complex_input = src.channels() == 2;
|
||||
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
|
||||
|
||||
// We don't support scaled transform
|
||||
CV_Assert(!is_scaled_dft);
|
||||
|
||||
// We don't support real-to-real transform
|
||||
CV_Assert(is_complex_input || is_complex_output);
|
||||
|
||||
GpuMat src_data, src_aux;
|
||||
|
||||
// Make sure here we work with the continuous input,
|
||||
// as CUFFT can't handle gaps
|
||||
if (src.isContinuous())
|
||||
src_aux = src;
|
||||
else
|
||||
{
|
||||
src_data = GpuMat(1, src.size().area(), src.type());
|
||||
src_aux = GpuMat(src.rows, src.cols, src.type(), src_data.ptr(), src.cols * src.elemSize());
|
||||
src.copyTo(src_aux);
|
||||
|
||||
if (is_1d_input && !is_row_dft)
|
||||
{
|
||||
// If the source matrix is the single column
|
||||
// reshape it into single row
|
||||
int rows = std::min(src.rows, src.cols);
|
||||
int cols = src.size().area() / rows;
|
||||
src_aux = GpuMat(rows, cols, src.type(), src_data.ptr(), cols * src.elemSize());
|
||||
}
|
||||
}
|
||||
|
||||
cufftType dft_type = CUFFT_R2C;
|
||||
if (is_complex_input)
|
||||
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
|
||||
|
||||
int dft_cols = src_aux.cols;
|
||||
if (is_complex_input && !is_complex_output)
|
||||
dft_cols = (src_aux.cols - 1) * 2 + (int)odd;
|
||||
CV_Assert(dft_cols > 1);
|
||||
|
||||
cufftHandle plan;
|
||||
if (is_1d_input || is_row_dft)
|
||||
cufftPlan1d(&plan, dft_cols, dft_type, src_aux.rows);
|
||||
else
|
||||
cufftPlan2d(&plan, src_aux.rows, dft_cols, dft_type);
|
||||
|
||||
GpuMat dst_data, dst_aux;
|
||||
int dst_cols, dst_rows;
|
||||
bool is_dst_mem_good;
|
||||
|
||||
if (is_complex_input)
|
||||
{
|
||||
if (is_complex_output)
|
||||
{
|
||||
is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32FC2
|
||||
&& dst.size().area() >= src.size().area();
|
||||
|
||||
if (is_dst_mem_good)
|
||||
dst_data = dst;
|
||||
else
|
||||
{
|
||||
dst_data.create(1, src.size().area(), CV_32FC2);
|
||||
dst_aux = GpuMat(src.rows, src.cols, dst_data.type(), dst_data.ptr(),
|
||||
src.cols * dst_data.elemSize());
|
||||
}
|
||||
|
||||
cufftSafeCall(cufftExecC2C(
|
||||
plan, src_data.ptr<cufftComplex>(),
|
||||
dst_data.ptr<cufftComplex>(),
|
||||
is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
|
||||
|
||||
if (!is_dst_mem_good)
|
||||
{
|
||||
dst.create(dst_aux.size(), dst_aux.type());
|
||||
dst_aux.copyTo(dst);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_rows = src.rows;
|
||||
dst_cols = (src.cols - 1) * 2 + (int)odd;
|
||||
if (src_aux.size() != src.size())
|
||||
{
|
||||
dst_rows = (src.rows - 1) * 2 + (int)odd;
|
||||
dst_cols = src.cols;
|
||||
}
|
||||
|
||||
is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32F
|
||||
&& dst.rows >= dst_rows && dst.cols >= dst_cols;
|
||||
|
||||
if (is_dst_mem_good)
|
||||
dst_data = dst;
|
||||
else
|
||||
{
|
||||
dst_data.create(1, dst_rows * dst_cols, CV_32F);
|
||||
dst_aux = GpuMat(dst_rows, dst_cols, dst_data.type(), dst_data.ptr(),
|
||||
dst_cols * dst_data.elemSize());
|
||||
}
|
||||
|
||||
cufftSafeCall(cufftExecC2R(
|
||||
plan, src_data.ptr<cufftComplex>(), dst_data.ptr<cufftReal>()));
|
||||
|
||||
if (!is_dst_mem_good)
|
||||
{
|
||||
dst.create(dst_aux.size(), dst_aux.type());
|
||||
dst_aux.copyTo(dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_rows = src.rows;
|
||||
dst_cols = src.cols / 2 + 1;
|
||||
if (src_aux.size() != src.size())
|
||||
{
|
||||
dst_rows = src.rows / 2 + 1;
|
||||
dst_cols = src.cols;
|
||||
}
|
||||
|
||||
is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32FC2
|
||||
&& dst.rows >= dst_rows && dst.cols >= dst_cols;
|
||||
|
||||
if (is_dst_mem_good)
|
||||
dst_data = dst;
|
||||
else
|
||||
{
|
||||
dst_data.create(1, dst_rows * dst_cols, CV_32FC2);
|
||||
dst_aux = GpuMat(dst_rows, dst_cols, dst_data.type(), dst_data.ptr(),
|
||||
dst_cols * dst_data.elemSize());
|
||||
}
|
||||
|
||||
cufftSafeCall(cufftExecR2C(
|
||||
plan, src_data.ptr<cufftReal>(), dst_data.ptr<cufftComplex>()));
|
||||
|
||||
if (!is_dst_mem_good)
|
||||
{
|
||||
dst.create(dst_aux.size(), dst_aux.type());
|
||||
dst_aux.copyTo(dst);
|
||||
}
|
||||
}
|
||||
|
||||
cufftSafeCall(cufftDestroy(plan));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// crossCorr
|
||||
|
||||
|
@ -53,13 +53,18 @@ struct CV_GpuMulSpectrumsTest: CvTest
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!test(1 + rand() % 100, 1 + rand() % 1000)) return;
|
||||
if (!testConj(1 + rand() % 100, 1 + rand() % 1000)) return;
|
||||
if (!testScaled(1 + rand() % 100, 1 + rand() % 1000)) return;
|
||||
if (!testScaledConj(1 + rand() % 100, 1 + rand() % 1000)) return;
|
||||
test(0);
|
||||
testConj(0);
|
||||
testScaled(0);
|
||||
testScaledConj(0);
|
||||
test(DFT_ROWS);
|
||||
testConj(DFT_ROWS);
|
||||
testScaled(DFT_ROWS);
|
||||
testScaledConj(DFT_ROWS);
|
||||
}
|
||||
catch (const Exception& e)
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, e.what());
|
||||
if (!check_and_treat_gpu_exception(e, ts)) throw;
|
||||
return;
|
||||
}
|
||||
@ -134,69 +139,225 @@ struct CV_GpuMulSpectrumsTest: CvTest
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test(int cols, int rows)
|
||||
void test(int flags)
|
||||
{
|
||||
int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;
|
||||
|
||||
Mat a, b;
|
||||
gen(cols, rows, a);
|
||||
gen(cols, rows, b);
|
||||
|
||||
Mat c_gold;
|
||||
mulSpectrums(a, b, c_gold, 0, false);
|
||||
mulSpectrums(a, b, c_gold, flags, false);
|
||||
|
||||
GpuMat d_c;
|
||||
mulSpectrums(GpuMat(a), GpuMat(b), d_c, 0, false);
|
||||
mulSpectrums(GpuMat(a), GpuMat(b), d_c, flags, false);
|
||||
|
||||
return cmp(c_gold, Mat(d_c))
|
||||
|| (ts->printf(CvTS::CONSOLE, "test failed: cols=%d, rows=%d\n", cols, rows), false);
|
||||
if (!cmp(c_gold, Mat(d_c)))
|
||||
ts->printf(CvTS::CONSOLE, "test failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);
|
||||
}
|
||||
|
||||
bool testConj(int cols, int rows)
|
||||
void testConj(int flags)
|
||||
{
|
||||
int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;
|
||||
|
||||
Mat a, b;
|
||||
gen(cols, rows, a);
|
||||
gen(cols, rows, b);
|
||||
|
||||
Mat c_gold;
|
||||
mulSpectrums(a, b, c_gold, 0, true);
|
||||
mulSpectrums(a, b, c_gold, flags, true);
|
||||
|
||||
GpuMat d_c;
|
||||
mulSpectrums(GpuMat(a), GpuMat(b), d_c, 0, true);
|
||||
mulSpectrums(GpuMat(a), GpuMat(b), d_c, flags, true);
|
||||
|
||||
return cmp(c_gold, Mat(d_c))
|
||||
|| (ts->printf(CvTS::CONSOLE, "testConj failed: cols=%d, rows=%d\n", cols, rows), false);
|
||||
if (!cmp(c_gold, Mat(d_c)))
|
||||
ts->printf(CvTS::CONSOLE, "testConj failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);
|
||||
}
|
||||
|
||||
bool testScaled(int cols, int rows)
|
||||
void testScaled(int flags)
|
||||
{
|
||||
int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;
|
||||
|
||||
Mat a, b;
|
||||
gen(cols, rows, a);
|
||||
gen(cols, rows, b);
|
||||
float scale = 1.f / a.size().area();
|
||||
|
||||
Mat c_gold;
|
||||
mulSpectrums(a, b, c_gold, 0, false);
|
||||
mulSpectrums(a, b, c_gold, flags, false);
|
||||
|
||||
GpuMat d_c;
|
||||
mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, 0, scale, false);
|
||||
mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, flags, scale, false);
|
||||
|
||||
return cmpScaled(c_gold, Mat(d_c), scale)
|
||||
|| (ts->printf(CvTS::CONSOLE, "testScaled failed: cols=%d, rows=%d\n", cols, rows), false);
|
||||
if (!cmpScaled(c_gold, Mat(d_c), scale))
|
||||
ts->printf(CvTS::CONSOLE, "testScaled failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);
|
||||
}
|
||||
|
||||
bool testScaledConj(int cols, int rows)
|
||||
void testScaledConj(int flags)
|
||||
{
|
||||
int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;
|
||||
|
||||
Mat a, b;
|
||||
gen(cols, rows, a);
|
||||
gen(cols, rows, b);
|
||||
float scale = 1.f / a.size().area();
|
||||
|
||||
Mat c_gold;
|
||||
mulSpectrums(a, b, c_gold, 0, true);
|
||||
mulSpectrums(a, b, c_gold, flags, true);
|
||||
|
||||
GpuMat d_c;
|
||||
mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, 0, scale, true);
|
||||
mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, flags, scale, true);
|
||||
|
||||
return cmpScaled(c_gold, Mat(d_c), scale)
|
||||
|| (ts->printf(CvTS::CONSOLE, "testScaledConj failed: cols=%d, rows=%d\n", cols, rows), false);
|
||||
if (!cmpScaled(c_gold, Mat(d_c), scale))
|
||||
ts->printf(CvTS::CONSOLE, "testScaledConj failed: cols=%d, rows=%d, flags=%D\n", cols, rows, flags);
|
||||
}
|
||||
} CV_GpuMulSpectrumsTest_inst;
|
||||
} CV_GpuMulSpectrumsTest_inst;
|
||||
|
||||
|
||||
struct CV_GpuDftTest: CvTest
|
||||
{
|
||||
CV_GpuDftTest(): CvTest("GPU-DftTest", "dft") {}
|
||||
|
||||
void run(int)
|
||||
{
|
||||
try
|
||||
{
|
||||
int cols = 1 + rand() % 100, rows = 1 + rand() % 100;
|
||||
|
||||
testC2C(cols, rows, 0, "no flags");
|
||||
testC2C(cols, rows + 1, 0, "no flags 0 1");
|
||||
testC2C(cols, rows + 1, 0, "no flags 1 0");
|
||||
testC2C(cols + 1, rows, 0, "no flags 1 1");
|
||||
testC2C(cols, rows, DFT_INVERSE, "DFT_INVERSE");
|
||||
testC2C(cols, rows, DFT_ROWS, "DFT_ROWS");
|
||||
testC2C(1, rows, 0, "single col");
|
||||
testC2C(cols, 1, 0, "single row");
|
||||
testC2C(1, rows, DFT_INVERSE, "single col inversed");
|
||||
testC2C(cols, 1, DFT_INVERSE, "single row inversed");
|
||||
testC2C(cols, 1, DFT_ROWS, "single row DFT_ROWS");
|
||||
testC2C(1, 2, 0, "size 1 2");
|
||||
testC2C(2, 1, 0, "size 2 1");
|
||||
|
||||
testR2CThenC2R(cols, rows, "sanity");
|
||||
testR2CThenC2R(cols, rows + 1, "sanity 0 1");
|
||||
testR2CThenC2R(cols + 1, rows, "sanity 1 0");
|
||||
testR2CThenC2R(cols + 1, rows + 1, "sanity 1 1");
|
||||
testR2CThenC2R(1, rows, "single col");
|
||||
testR2CThenC2R(1, rows + 1, "single col 1");
|
||||
testR2CThenC2R(cols, 1, "single row" );;
|
||||
testR2CThenC2R(cols + 1, 1, "single row 1" );;
|
||||
}
|
||||
catch (const Exception& e)
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, e.what());
|
||||
if (!check_and_treat_gpu_exception(e, ts)) throw;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void gen(int cols, int rows, int cn, Mat& mat)
|
||||
{
|
||||
RNG rng;
|
||||
mat.create(rows, cols, CV_MAKETYPE(CV_32F, cn));
|
||||
rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f));
|
||||
}
|
||||
|
||||
bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f, float scale=1.f)
|
||||
{
|
||||
if (gold.size() != mine.size())
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "bad sizes: gold: %d %d, mine: %d %d\n", gold.cols, gold.rows, mine.cols, mine.rows);
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
return false;
|
||||
}
|
||||
if (gold.depth() != mine.depth())
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "bad depth: gold=%d, mine=%d\n", gold.depth(), mine.depth());
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
return false;
|
||||
}
|
||||
if (gold.channels() != mine.channels())
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "bad channel count: gold=%d, mine=%d\n", gold.channels(), mine.channels());
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < gold.rows; ++i)
|
||||
{
|
||||
for (int j = 0; j < gold.cols * gold.channels(); ++j)
|
||||
{
|
||||
float gold_ = gold.at<float>(i, j);
|
||||
float mine_ = mine.at<float>(i, j) * scale;
|
||||
if (fabs(gold_ - mine_) > max_err)
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j / gold.channels(), i, gold_, mine_);
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void testC2C(int cols, int rows, int flags, const std::string& hint)
|
||||
{
|
||||
Mat a;
|
||||
gen(cols, rows, 2, a);
|
||||
|
||||
Mat b_gold;
|
||||
dft(a, b_gold, flags);
|
||||
|
||||
GpuMat d_b;
|
||||
dft(GpuMat(a), d_b, flags);
|
||||
|
||||
bool ok = true;
|
||||
if (ok && d_b.depth() != CV_32F)
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "bad depth: %d\n", d_b.depth());
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
ok = false;
|
||||
}
|
||||
if (ok && d_b.channels() != 2)
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "bad channel count: %d\n", d_b.channels());
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
ok = false;
|
||||
}
|
||||
if (ok) ok = cmp(b_gold, Mat(d_b), rows * cols * 1e-5f);
|
||||
if (!ok)
|
||||
ts->printf(CvTS::CONSOLE, "testC2C failed: hint=%s, cols=%d, rows=%d, flags=%d\n", hint.c_str(), cols, rows, flags);
|
||||
}
|
||||
|
||||
void testR2CThenC2R(int cols, int rows, const std::string& hint)
|
||||
{
|
||||
Mat a;
|
||||
gen(cols, rows, 1, a);
|
||||
|
||||
bool odd = false;
|
||||
if (a.cols == 1) odd = a.rows % 2 == 1;
|
||||
else odd = a.cols % 2 == 1;
|
||||
bool ok = true;
|
||||
|
||||
GpuMat d_b;
|
||||
GpuMat d_c;
|
||||
dft(GpuMat(a), d_b, 0);
|
||||
dft(d_b, d_c, DFT_REAL_OUTPUT, 0, odd);
|
||||
|
||||
if (ok && d_c.depth() != CV_32F)
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "bad depth: %d\n", d_c.depth());
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
ok = false;
|
||||
}
|
||||
if (ok && d_c.channels() != 1)
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "bad channel count: %d\n", d_c.channels());
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
ok = false;
|
||||
}
|
||||
if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f, 1.f / (rows * cols));
|
||||
if (!ok)
|
||||
ts->printf(CvTS::CONSOLE, "testR2CThenC2R failed: hint=%s, cols=%d, rows=%d\n", hint.c_str(), cols, rows);
|
||||
}
|
||||
} CV_GpuDftTest_inst;
|
Loading…
x
Reference in New Issue
Block a user