diff --git a/modules/core/perf/opencl/perf_dxt.cpp b/modules/core/perf/opencl/perf_dxt.cpp index edeeda7f0..3980a191f 100644 --- a/modules/core/perf/opencl/perf_dxt.cpp +++ b/modules/core/perf/opencl/perf_dxt.cpp @@ -67,7 +67,7 @@ typedef TestBaseWithParam DftFixture; OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C, R2R, C2R, R2C), Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(512, 512), Size(2048, 2048)), - Values((int)DFT_ROWS, (int) 0/*, (int)DFT_SCALE, (int)DFT_INVERSE, + Values((int)DFT_ROWS, (int) 0, (int)DFT_SCALE/*, (int)DFT_INVERSE, (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/))) { const DftParams params = GetParam(); diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 449e19db4..879a70613 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2129,8 +2129,8 @@ struct OCL_FftPlan for (int k=0; k<(n/radix); k++) { - ptr[ptr_index++] = cos(k*theta); - ptr[ptr_index++] = sin(k*theta); + ptr[ptr_index++] = (float) cos(k*theta); + ptr[ptr_index++] = (float) sin(k*theta); } } } @@ -2152,13 +2152,14 @@ struct OCL_FftPlan String kernel_name; bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; + bool inv = (flags & DFT_INVERSE) != 0; String options = buildOptions; if (rows) { globalsize[0] = thread_count; globalsize[1] = dft_size; localsize[0] = thread_count; localsize[1] = 1; - kernel_name = "fft_multi_radix_rows"; + kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows"; if (is1d && (flags & DFT_SCALE)) options += " -D DFT_SCALE"; } @@ -2166,7 +2167,7 @@ struct OCL_FftPlan { globalsize[0] = dft_size; globalsize[1] = thread_count; localsize[0] = 1; localsize[1] = thread_count; - kernel_name = "fft_multi_radix_cols"; + kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols"; if (flags & DFT_SCALE) options += " -D DFT_SCALE"; } @@ -2270,13 +2271,10 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro // if output format is not specified if (complex_output + real_output == 0) { - if (!inv) - { - if (real_input) - real_output = 1; - else - complex_output = 1; - } + if (real_input) + real_output = 1; + else + complex_output = 1; } // Forward Complex to CCS not supported @@ -2294,23 +2292,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro real_output = 1; } - UMat input, output; - if (complex_input) - { - input = src; - } - else - { - if (!inv) - { - input = src; - } - else - { - // TODO: unpack from CCS format - } - } - + UMat output; if (complex_output) { _dst.create(src.size(), CV_32FC2); @@ -2330,7 +2312,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro } } - if (!ocl_dft_C2C_rows(input, output, nonzero_rows, flags)) + if (!ocl_dft_C2C_rows(src, output, nonzero_rows, flags)) return false; if (!is1d) diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index d59e0d9b4..8aecfc056 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -424,4 +424,117 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step, } #endif } +} + +__kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + __constant float2 * twiddles_ptr, const int t, const int nz) +{ + const int x = get_global_id(0); + const int y = get_group_id(1); + + if (y < nz) + { + __local float2 smem[LOCAL_SIZE]; + __constant const float2* twiddles = (__constant float2*) twiddles_ptr; + const int ind = x; + const int block_size = LOCAL_SIZE/kercn; +#ifdef IS_1D + float scale = 1.f/dst_cols; +#else + float scale = 1.f/(dst_cols*dst_rows); +#endif + +#ifndef REAL_INPUT + __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset))); + #pragma unroll + for (int i=0; i