diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 69ec2c9ef..449e19db4 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2151,27 +2151,34 @@ struct OCL_FftPlan size_t localsize[2]; String kernel_name; + bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; + String options = buildOptions; + if (rows) { globalsize[0] = thread_count; globalsize[1] = dft_size; localsize[0] = thread_count; localsize[1] = 1; kernel_name = "fft_multi_radix_rows"; + if (is1d && (flags & DFT_SCALE)) + options += " -D DFT_SCALE"; } else { globalsize[0] = dft_size; globalsize[1] = thread_count; localsize[0] = 1; localsize[1] = thread_count; kernel_name = "fft_multi_radix_cols"; + if (flags & DFT_SCALE) + options += " -D DFT_SCALE"; } - - bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; - String options = buildOptions; + if (src.channels() == 1) options += " -D REAL_INPUT"; if (dst.channels() == 1) options += " -D CCS_OUTPUT"; if ((is1d && src.channels() == 1) || (rows && (flags & DFT_REAL_OUTPUT))) options += " -D NO_CONJUGATE"; + if (is1d) + options += " -D IS_1D"; ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options); if (k.empty()) diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index a778d59f2..d59e0d9b4 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -301,6 +301,12 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i barrier(CLK_LOCAL_MEM_FENCE); } +#ifdef DFT_SCALE +#define VAL(x, scale) x*scale +#else +#define VAL(x, scale) x +#endif + __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, __constant float2 * twiddles_ptr, const int t, const int nz) @@ -314,6 +320,11 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, __constant const float2* twiddles = (__constant float2*) twiddles_ptr; const int ind = x; const int block_size = LOCAL_SIZE/kercn; +#ifdef IS_1D + float scale = 1.f/dst_cols; +#else + float scale = 1.f/(dst_cols*dst_rows); +#endif #ifndef REAL_INPUT __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); @@ -341,15 +352,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset)); #pragma unroll for (int i=x; i -#include -#include using namespace cv; using namespace std; @@ -26,31 +24,6 @@ const char* keys = int main(int argc, const char ** argv) { - //int cols = 4; - //int rows = 768; - //srand(0); - //Mat input(Size(cols, rows), CV_32FC2); - //for (int i=0; i(j,i) = Vec2f((float) rand() / RAND_MAX, (float) rand() / RAND_MAX); - //Mat dst; - // - //UMat gpu_input, gpu_dst; - //input.copyTo(gpu_input); - //auto start = std::chrono::system_clock::now(); - //dft(input, dst, DFT_ROWS); - //auto cpu_duration = chrono::duration_cast(chrono::system_clock::now() - start); - // - //start = std::chrono::system_clock::now(); - //dft(gpu_input, gpu_dst, DFT_ROWS); - //auto gpu_duration = chrono::duration_cast(chrono::system_clock::now() - start); - - //double n = norm(dst, gpu_dst); - //cout << "norm = " << n << endl; - //cout << "CPU time: " << cpu_duration.count() << "ms" << endl; - //cout << "GPU time: " << gpu_duration.count() << "ms" << endl; - - help(); CommandLineParser parser(argc, argv, keys); string filename = parser.get(0); @@ -62,46 +35,16 @@ int main(int argc, const char ** argv) printf("Cannot read image file: %s\n", filename.c_str()); return -1; } - - Mat small_img = img(Rect(0,0,6,6)); - - int M = getOptimalDFTSize( small_img.rows ); - int N = getOptimalDFTSize( small_img.cols ); + int M = getOptimalDFTSize( img.rows ); + int N = getOptimalDFTSize( img.cols ); Mat padded; - copyMakeBorder(small_img, padded, 0, M - small_img.rows, 0, N - small_img.cols, BORDER_CONSTANT, Scalar::all(0)); + copyMakeBorder(img, padded, 0, M - img.rows, 0, N - img.cols, BORDER_CONSTANT, Scalar::all(0)); - Mat planes[] = {Mat_(padded), Mat::ones(padded.size(), CV_32F)}; - Mat complexImg, complexImg1, complexInput; + Mat planes[] = {Mat_(padded), Mat::zeros(padded.size(), CV_32F)}; + Mat complexImg; merge(planes, 2, complexImg); - Mat realInput; - padded.convertTo(realInput, CV_32F); - complexInput = complexImg; - //cout << complexImg << endl; - //dft(complexImg, complexImg, DFT_REAL_OUTPUT); - //cout << "Complex to Complex" << endl; - //cout << complexImg << endl; - cout << "Complex input" << endl << complexInput << endl; - cout << "Real input" << endl << realInput << endl; - - dft(complexInput, complexImg1, DFT_COMPLEX_OUTPUT); - cout << "Complex to Complex image: " << endl; - cout << endl << complexImg1 << endl; - - Mat realImg1; - dft(complexInput, realImg1, DFT_REAL_OUTPUT); - cout << "Complex to Real image: " << endl; - cout << endl << realImg1 << endl; - - Mat realOut; - dft(complexImg1, realOut, DFT_INVERSE | DFT_COMPLEX_OUTPUT); - cout << "Complex to Complex (inverse):" << endl; - cout << realOut << endl; - - Mat complexOut; - dft(realImg1, complexOut, DFT_INVERSE | DFT_REAL_OUTPUT | DFT_SCALE); - cout << "Complex to Real (inverse):" << endl; - cout << complexOut << endl; + dft(complexImg, complexImg); // compute log(1 + sqrt(Re(DFT(img))**2 + Im(DFT(img))**2)) split(complexImg, planes);