Added nonzero_rows support
This commit is contained in:
parent
52f76a3283
commit
1d2cf0e20e
@ -2034,19 +2034,19 @@ enum FftType
|
|||||||
C2C = 3
|
C2C = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
|
static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
|
||||||
{
|
{
|
||||||
int factors[34];
|
int factors[34];
|
||||||
int nf = DFTFactorize( cols, factors );
|
int nf = DFTFactorize(cols, factors);
|
||||||
|
|
||||||
int n = 1;
|
int n = 1;
|
||||||
int factor_index = 0;
|
int factor_index = 0;
|
||||||
min_radix = INT_MAX;
|
min_radix = INT_MAX;
|
||||||
|
|
||||||
// 2^n transforms
|
// 2^n transforms
|
||||||
if ( (factors[factor_index] & 1) == 0 )
|
if ((factors[factor_index] & 1) == 0)
|
||||||
{
|
{
|
||||||
for( ; n < factors[factor_index]; )
|
for( ; n < factors[factor_index];)
|
||||||
{
|
{
|
||||||
int radix = 2, block = 1;
|
int radix = 2, block = 1;
|
||||||
if (8*n <= factors[0])
|
if (8*n <= factors[0])
|
||||||
@ -2080,7 +2080,7 @@ static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// all the other transforms
|
// all the other transforms
|
||||||
for( ; factor_index < nf; factor_index++ )
|
for( ; factor_index < nf; factor_index++)
|
||||||
{
|
{
|
||||||
int radix = factors[factor_index], block = 1;
|
int radix = factors[factor_index], block = 1;
|
||||||
if (radix == 3)
|
if (radix == 3)
|
||||||
@ -2101,7 +2101,6 @@ static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std:
|
|||||||
blocks.push_back(block);
|
blocks.push_back(block);
|
||||||
min_radix = min(min_radix, block*radix);
|
min_radix = min(min_radix, block*radix);
|
||||||
}
|
}
|
||||||
return radixes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct OCL_FftPlan
|
struct OCL_FftPlan
|
||||||
@ -2111,14 +2110,13 @@ struct OCL_FftPlan
|
|||||||
int thread_count;
|
int thread_count;
|
||||||
|
|
||||||
int dft_size;
|
int dft_size;
|
||||||
int flags;
|
|
||||||
bool status;
|
bool status;
|
||||||
OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags), status(true)
|
OCL_FftPlan(int _size): dft_size(_size), status(true)
|
||||||
{
|
{
|
||||||
int min_radix;
|
int min_radix;
|
||||||
std::vector<int> radixes, blocks;
|
std::vector<int> radixes, blocks;
|
||||||
ocl_getRadixes(dft_size, radixes, blocks, min_radix);
|
ocl_getRadixes(dft_size, radixes, blocks, min_radix);
|
||||||
thread_count = (dft_size + min_radix-1) / min_radix;
|
thread_count = dft_size / min_radix;
|
||||||
|
|
||||||
if (thread_count > ocl::Device::getDefault().maxWorkGroupSize())
|
if (thread_count > ocl::Device::getDefault().maxWorkGroupSize())
|
||||||
{
|
{
|
||||||
@ -2140,8 +2138,7 @@ struct OCL_FftPlan
|
|||||||
n *= radix;
|
n *= radix;
|
||||||
}
|
}
|
||||||
|
|
||||||
twiddles.create(1, twiddle_size, CV_32FC2);
|
Mat tw(1, twiddle_size, CV_32FC2);
|
||||||
Mat tw = twiddles.getMat(ACCESS_WRITE);
|
|
||||||
float* ptr = tw.ptr<float>();
|
float* ptr = tw.ptr<float>();
|
||||||
int ptr_index = 0;
|
int ptr_index = 0;
|
||||||
|
|
||||||
@ -2162,6 +2159,7 @@ struct OCL_FftPlan
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
twiddles = tw.getUMat(ACCESS_READ);
|
||||||
|
|
||||||
buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s",
|
buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s",
|
||||||
dft_size, dft_size/thread_count, radix_processing.c_str());
|
dft_size, dft_size/thread_count, radix_processing.c_str());
|
||||||
@ -2185,10 +2183,10 @@ struct OCL_FftPlan
|
|||||||
|
|
||||||
if (rows)
|
if (rows)
|
||||||
{
|
{
|
||||||
globalsize[0] = thread_count; globalsize[1] = dft_size;
|
globalsize[0] = thread_count; globalsize[1] = src.rows;
|
||||||
localsize[0] = thread_count; localsize[1] = 1;
|
localsize[0] = thread_count; localsize[1] = 1;
|
||||||
kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
|
kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
|
||||||
if (is1d && (flags & DFT_SCALE))
|
if ((is1d || inv) && (flags & DFT_SCALE))
|
||||||
options += " -D DFT_SCALE";
|
options += " -D DFT_SCALE";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -2200,14 +2198,9 @@ struct OCL_FftPlan
|
|||||||
options += " -D DFT_SCALE";
|
options += " -D DFT_SCALE";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (src.channels() == 1)
|
options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
|
||||||
options += " -D REAL_INPUT";
|
options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
|
||||||
else
|
options += is1d ? " -D IS_1D" : "";
|
||||||
options += " -D COMPLEX_INPUT";
|
|
||||||
if (dst.channels() == 1)
|
|
||||||
options += " -D REAL_OUTPUT";
|
|
||||||
if (is1d)
|
|
||||||
options += " -D IS_1D";
|
|
||||||
|
|
||||||
if (!inv)
|
if (!inv)
|
||||||
{
|
{
|
||||||
@ -2216,10 +2209,10 @@ struct OCL_FftPlan
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (is1d && fftType == C2R || (rows && fftType == R2R))
|
if (rows && (fftType == C2R || fftType == R2R))
|
||||||
options += " -D NO_CONJUGATE";
|
options += " -D NO_CONJUGATE";
|
||||||
if (dst.cols % 2 == 0)
|
if (dst.cols % 2 == 0)
|
||||||
options += " -D EVEN";
|
options += " -D EVEN";
|
||||||
}
|
}
|
||||||
|
|
||||||
ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
|
ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
|
||||||
@ -2240,7 +2233,7 @@ public:
|
|||||||
return planCache;
|
return planCache;
|
||||||
}
|
}
|
||||||
|
|
||||||
OCL_FftPlan* getFftPlan(int dft_size, int flags)
|
OCL_FftPlan* getFftPlan(int dft_size)
|
||||||
{
|
{
|
||||||
for (size_t i = 0, size = planStorage.size(); i < size; ++i)
|
for (size_t i = 0, size = planStorage.size(); i < size; ++i)
|
||||||
{
|
{
|
||||||
@ -2252,7 +2245,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size, flags);
|
OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size);
|
||||||
planStorage.push_back(newPlan);
|
planStorage.push_back(newPlan);
|
||||||
return newPlan;
|
return newPlan;
|
||||||
}
|
}
|
||||||
@ -2275,13 +2268,13 @@ protected:
|
|||||||
|
|
||||||
static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
|
static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
|
||||||
{
|
{
|
||||||
const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags);
|
const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols());
|
||||||
return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
|
return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
|
static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
|
||||||
{
|
{
|
||||||
const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), flags);
|
const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows());
|
||||||
return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
|
return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2385,7 +2378,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int nonzero_cols = src.cols/2 + 1;// : src.cols;
|
int nonzero_cols = src.cols/2 + 1;
|
||||||
if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType))
|
if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -3002,7 +3002,8 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
|||||||
sync ? 0 : &p->e);
|
sync ? 0 : &p->e);
|
||||||
if( sync || retval != CL_SUCCESS )
|
if( sync || retval != CL_SUCCESS )
|
||||||
{
|
{
|
||||||
CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
|
int a = clFinish(qq);
|
||||||
|
CV_OclDbgAssert(a == CL_SUCCESS);
|
||||||
p->cleanupUMats();
|
p->cleanupUMats();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -3898,8 +3899,9 @@ public:
|
|||||||
if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
|
if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
|
||||||
{
|
{
|
||||||
AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
|
AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
|
||||||
CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
|
int a = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
|
||||||
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
|
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
|
||||||
|
CV_Assert( a == CL_SUCCESS );
|
||||||
u->markHostCopyObsolete(false);
|
u->markHostCopyObsolete(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ float2 twiddle(float2 a) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void butterfly2(float2 a0, float2 a1, __local float2* smem, __constant const float2* twiddles,
|
void butterfly2(float2 a0, float2 a1, __local float2* smem, __global const float2* twiddles,
|
||||||
const int x, const int block_size)
|
const int x, const int block_size)
|
||||||
{
|
{
|
||||||
const int k = x & (block_size - 1);
|
const int k = x & (block_size - 1);
|
||||||
@ -28,7 +28,7 @@ void butterfly2(float2 a0, float2 a1, __local float2* smem, __constant const flo
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem, __constant const float2* twiddles,
|
void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem, __global const float2* twiddles,
|
||||||
const int x, const int block_size)
|
const int x, const int block_size)
|
||||||
{
|
{
|
||||||
const int k = x & (block_size - 1);
|
const int k = x & (block_size - 1);
|
||||||
@ -50,10 +50,10 @@ void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __constant const float2* twiddles,
|
void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global const float2* twiddles,
|
||||||
const int x, const int block_size)
|
const int x, const int block_size)
|
||||||
{
|
{
|
||||||
const int k = x & (block_size - 1);
|
const int k = x % block_size;
|
||||||
a1 = mul_float2(twiddles[k], a1);
|
a1 = mul_float2(twiddles[k], a1);
|
||||||
a2 = mul_float2(twiddles[k+block_size], a2);
|
a2 = mul_float2(twiddles[k+block_size], a2);
|
||||||
const int dst_ind = ((x - k) * 3) + k;
|
const int dst_ind = ((x - k) * 3) + k;
|
||||||
@ -68,10 +68,10 @@ void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __constan
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local float2* smem, __constant const float2* twiddles,
|
void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local float2* smem, __global const float2* twiddles,
|
||||||
const int x, const int block_size)
|
const int x, const int block_size)
|
||||||
{
|
{
|
||||||
const int k = x & (block_size - 1);
|
const int k = x % block_size;
|
||||||
a1 = mul_float2(twiddles[k], a1);
|
a1 = mul_float2(twiddles[k], a1);
|
||||||
a2 = mul_float2(twiddles[k + block_size], a2);
|
a2 = mul_float2(twiddles[k + block_size], a2);
|
||||||
a3 = mul_float2(twiddles[k+2*block_size], a3);
|
a3 = mul_float2(twiddles[k+2*block_size], a3);
|
||||||
@ -109,7 +109,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix2(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t)
|
void fft_radix2(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
float2 a0, a1;
|
float2 a0, a1;
|
||||||
|
|
||||||
@ -128,7 +128,7 @@ void fft_radix2(__local float2* smem, __constant const float2* twiddles, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int x2 = x1 + t/2;
|
const int x2 = x1 + t/2;
|
||||||
float2 a0, a1, a2, a3;
|
float2 a0, a1, a2, a3;
|
||||||
@ -151,7 +151,7 @@ void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix2_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int x2 = x1 + t/3;
|
const int x2 = x1 + t/3;
|
||||||
const int x3 = x1 + 2*t/3;
|
const int x3 = x1 + 2*t/3;
|
||||||
@ -177,7 +177,7 @@ void fft_radix2_B3(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int thread_block = t/4;
|
const int thread_block = t/4;
|
||||||
const int x2 = x1 + thread_block;
|
const int x2 = x1 + thread_block;
|
||||||
@ -207,7 +207,7 @@ void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix2_B5(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix2_B5(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int thread_block = t/5;
|
const int thread_block = t/5;
|
||||||
const int x2 = x1 + thread_block;
|
const int x2 = x1 + thread_block;
|
||||||
@ -240,7 +240,7 @@ void fft_radix2_B5(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix4(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t)
|
void fft_radix4(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
float2 a0, a1, a2, a3;
|
float2 a0, a1, a2, a3;
|
||||||
|
|
||||||
@ -258,7 +258,7 @@ void fft_radix4(__local float2* smem, __constant const float2* twiddles, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix4_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix4_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int x2 = x1 + t/2;
|
const int x2 = x1 + t/2;
|
||||||
float2 a0, a1, a2, a3, a4, a5, a6, a7;
|
float2 a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
@ -281,7 +281,7 @@ void fft_radix4_B2(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix4_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix4_B3(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int x2 = x1 + t/3;
|
const int x2 = x1 + t/3;
|
||||||
const int x3 = x2 + t/3;
|
const int x3 = x2 + t/3;
|
||||||
@ -307,7 +307,7 @@ void fft_radix4_B3(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix8(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t)
|
void fft_radix8(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int k = x % block_size;
|
const int k = x % block_size;
|
||||||
float2 a0, a1, a2, a3, a4, a5, a6, a7;
|
float2 a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
@ -370,7 +370,7 @@ void fft_radix8(__local float2* smem, __constant const float2* twiddles, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix3(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t)
|
void fft_radix3(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
float2 a0, a1, a2;
|
float2 a0, a1, a2;
|
||||||
|
|
||||||
@ -388,7 +388,7 @@ void fft_radix3(__local float2* smem, __constant const float2* twiddles, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix3_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int x2 = x1 + t/2;
|
const int x2 = x1 + t/2;
|
||||||
float2 a0, a1, a2, a3, a4, a5;
|
float2 a0, a1, a2, a3, a4, a5;
|
||||||
@ -411,7 +411,7 @@ void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix3_B3(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix3_B3(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int x2 = x1 + t/3;
|
const int x2 = x1 + t/3;
|
||||||
const int x3 = x2 + t/3;
|
const int x3 = x2 + t/3;
|
||||||
@ -437,7 +437,7 @@ void fft_radix3_B3(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix3_B4(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix3_B4(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int thread_block = t/4;
|
const int thread_block = t/4;
|
||||||
const int x2 = x1 + thread_block;
|
const int x2 = x1 + thread_block;
|
||||||
@ -467,7 +467,7 @@ void fft_radix3_B4(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix5(__local float2* smem, __constant const float2* twiddles, const int x, const int block_size, const int t)
|
void fft_radix5(__local float2* smem, __global const float2* twiddles, const int x, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int k = x % block_size;
|
const int k = x % block_size;
|
||||||
float2 a0, a1, a2, a3, a4;
|
float2 a0, a1, a2, a3, a4;
|
||||||
@ -486,7 +486,7 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
void fft_radix5_B2(__local float2* smem, __constant const float2* twiddles, const int x1, const int block_size, const int t)
|
void fft_radix5_B2(__local float2* smem, __global const float2* twiddles, const int x1, const int block_size, const int t)
|
||||||
{
|
{
|
||||||
const int x2 = x1+t/2;
|
const int x2 = x1+t/2;
|
||||||
float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9;
|
float2 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9;
|
||||||
@ -516,24 +516,23 @@ void fft_radix5_B2(__local float2* smem, __constant const float2* twiddles, cons
|
|||||||
|
|
||||||
__kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
__global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||||
__constant float2 * twiddles_ptr, const int t, const int nz)
|
__global float2* twiddles_ptr, const int t, const int nz)
|
||||||
{
|
{
|
||||||
const int x = get_global_id(0);
|
const int x = get_global_id(0);
|
||||||
const int y = get_group_id(1);
|
const int y = get_group_id(1);
|
||||||
|
const int block_size = LOCAL_SIZE/kercn;
|
||||||
if (y < nz)
|
if (y < nz)
|
||||||
{
|
{
|
||||||
__local float2 smem[LOCAL_SIZE];
|
__local float2 smem[LOCAL_SIZE];
|
||||||
__constant const float2* twiddles = (__constant float2*) twiddles_ptr;
|
__global const float2* twiddles = (__global float2*) twiddles_ptr;
|
||||||
const int ind = x;
|
const int ind = x;
|
||||||
const int block_size = LOCAL_SIZE/kercn;
|
|
||||||
#ifdef IS_1D
|
#ifdef IS_1D
|
||||||
float scale = 1.f/dst_cols;
|
float scale = 1.f/dst_cols;
|
||||||
#else
|
#else
|
||||||
float scale = 1.f/(dst_cols*dst_rows);
|
float scale = 1.f/(dst_cols*dst_rows);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef REAL_INPUT
|
#ifdef COMPLEX_INPUT
|
||||||
__global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset)));
|
__global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset)));
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=0; i<kercn; i++)
|
for (int i=0; i<kercn; i++)
|
||||||
@ -548,7 +547,7 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
|
|||||||
|
|
||||||
RADIX_PROCESS;
|
RADIX_PROCESS;
|
||||||
|
|
||||||
#ifndef REAL_OUTPUT
|
#ifdef COMPLEX_OUTPUT
|
||||||
#ifdef NO_CONJUGATE
|
#ifdef NO_CONJUGATE
|
||||||
// copy result without complex conjugate
|
// copy result without complex conjugate
|
||||||
const int cols = dst_cols/2 + 1;
|
const int cols = dst_cols/2 + 1;
|
||||||
@ -570,11 +569,18 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
|
|||||||
dst[0] = VAL(smem_1cn[0], scale);
|
dst[0] = VAL(smem_1cn[0], scale);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
__global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset));
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=x; i<dst_cols; i+=block_size)
|
||||||
|
dst[i] = (float2) 0.f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
__global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||||
__constant float2 * twiddles_ptr, const int t, const int nz)
|
__global float2* twiddles_ptr, const int t, const int nz)
|
||||||
{
|
{
|
||||||
const int x = get_group_id(0);
|
const int x = get_group_id(0);
|
||||||
const int y = get_global_id(1);
|
const int y = get_global_id(1);
|
||||||
@ -583,7 +589,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
|
|||||||
{
|
{
|
||||||
__local float2 smem[LOCAL_SIZE];
|
__local float2 smem[LOCAL_SIZE];
|
||||||
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset));
|
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset));
|
||||||
__constant const float2* twiddles = (__constant float2*) twiddles_ptr;
|
__global const float2* twiddles = (__global float2*) twiddles_ptr;
|
||||||
const int ind = y;
|
const int ind = y;
|
||||||
const int block_size = LOCAL_SIZE/kercn;
|
const int block_size = LOCAL_SIZE/kercn;
|
||||||
float scale = 1.f/(dst_rows*dst_cols);
|
float scale = 1.f/(dst_rows*dst_cols);
|
||||||
@ -596,7 +602,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
|
|||||||
|
|
||||||
RADIX_PROCESS;
|
RADIX_PROCESS;
|
||||||
|
|
||||||
#ifndef REAL_OUTPUT
|
#ifdef COMPLEX_OUTPUT
|
||||||
__global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset));
|
__global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset));
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=0; i<kercn; i++)
|
for (int i=0; i<kercn; i++)
|
||||||
@ -633,21 +639,26 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset,
|
__kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar* dst_ptr, int dst_step, int dst_offset,
|
__global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||||
__constant float2 * twiddles_ptr, const int t, const int nz)
|
__global float2* twiddles_ptr, const int t, const int nz)
|
||||||
{
|
{
|
||||||
const int x = get_global_id(0);
|
const int x = get_global_id(0);
|
||||||
const int y = get_group_id(1);
|
const int y = get_group_id(1);
|
||||||
|
const int block_size = LOCAL_SIZE/kercn;
|
||||||
|
#ifdef IS_1D
|
||||||
|
const float scale = 1.f/dst_cols;
|
||||||
|
#else
|
||||||
|
const float scale = 1.f/(dst_cols*dst_rows);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (y < nz)
|
if (y < nz)
|
||||||
{
|
{
|
||||||
__local float2 smem[LOCAL_SIZE];
|
__local float2 smem[LOCAL_SIZE];
|
||||||
__constant const float2* twiddles = (__constant float2*) twiddles_ptr;
|
__global const float2* twiddles = (__global float2*) twiddles_ptr;
|
||||||
const int ind = x;
|
const int ind = x;
|
||||||
const int block_size = LOCAL_SIZE/kercn;
|
|
||||||
|
|
||||||
#ifndef REAL
|
#if defined(COMPLEX_INPUT) && !defined(NO_CONJUGATE)
|
||||||
__global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset)));
|
__global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset)));
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=0; i<kercn; i++)
|
for (int i=0; i<kercn; i++)
|
||||||
@ -657,10 +668,10 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
__global const float2* src;
|
__global const float2* src;
|
||||||
#ifdef COMPLEX_INPUT
|
#if !defined(REAL_INPUT) && defined(NO_CONJUGATE)
|
||||||
src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(2, (int)sizeof(float), src_offset)));
|
src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(2, (int)sizeof(float), src_offset)));
|
||||||
#else
|
#else
|
||||||
src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(1, (int)sizeof(float), src_offset)));
|
src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(1, (int)sizeof(float), src_offset)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
@ -688,39 +699,46 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
|
|||||||
RADIX_PROCESS;
|
RADIX_PROCESS;
|
||||||
|
|
||||||
// copy data to dst
|
// copy data to dst
|
||||||
#ifndef REAL
|
#ifdef COMPLEX_OUTPUT
|
||||||
__global float2* dst = (__global float*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset)));
|
__global float2* dst = (__global float*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset)));
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=0; i<kercn; i++)
|
for (int i=0; i<kercn; i++)
|
||||||
{
|
{
|
||||||
dst[i*block_size].x = smem[x + i*block_size].x;
|
dst[i*block_size].x = VAL(smem[x + i*block_size].x, scale);
|
||||||
dst[i*block_size].y = -smem[x + i*block_size].y;
|
dst[i*block_size].y = VAL(-smem[x + i*block_size].y, scale);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
__global float* dst = (__global float*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)), dst_offset)));
|
__global float* dst = (__global float*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)), dst_offset)));
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=0; i<kercn; i++)
|
for (int i=0; i<kercn; i++)
|
||||||
{
|
{
|
||||||
dst[i*block_size] = smem[x + i*block_size].x;
|
dst[i*block_size] = VAL(smem[x + i*block_size].x, scale);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
__global float2* dst = (__global float*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset)));
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=0; i<kercn; i++)
|
||||||
|
dst[i*block_size] = (float2) 0.f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, int src_offset,
|
__kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar* dst_ptr, int dst_step, int dst_offset,
|
__global uchar* dst_ptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||||
__constant float2 * twiddles_ptr, const int t, const int nz)
|
__global float2* twiddles_ptr, const int t, const int nz)
|
||||||
{
|
{
|
||||||
const int x = get_group_id(0);
|
const int x = get_group_id(0);
|
||||||
const int y = get_global_id(1);
|
const int y = get_global_id(1);
|
||||||
|
|
||||||
#ifndef REAL
|
#ifdef COMPLEX_INPUT
|
||||||
if (x < nz)
|
if (x < nz)
|
||||||
{
|
{
|
||||||
__local float2 smem[LOCAL_SIZE];
|
__local float2 smem[LOCAL_SIZE];
|
||||||
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset));
|
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(float)*2), src_offset));
|
||||||
__global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset));
|
__global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset));
|
||||||
__constant const float2* twiddles = (__constant float2*) twiddles_ptr;
|
__global const float2* twiddles = (__global float2*) twiddles_ptr;
|
||||||
const int ind = y;
|
const int ind = y;
|
||||||
const int block_size = LOCAL_SIZE/kercn;
|
const int block_size = LOCAL_SIZE/kercn;
|
||||||
|
|
||||||
@ -748,7 +766,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
|
|||||||
#else
|
#else
|
||||||
if (x < nz)
|
if (x < nz)
|
||||||
{
|
{
|
||||||
__constant const float2* twiddles = (__constant float2*) twiddles_ptr;
|
__global const float2* twiddles = (__global float2*) twiddles_ptr;
|
||||||
const int ind = y;
|
const int ind = y;
|
||||||
const int block_size = LOCAL_SIZE/kercn;
|
const int block_size = LOCAL_SIZE/kercn;
|
||||||
|
|
||||||
@ -756,7 +774,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
|
|||||||
#ifdef EVEN
|
#ifdef EVEN
|
||||||
if (x!=0 && (x!=(nz-1)))
|
if (x!=0 && (x!=(nz-1)))
|
||||||
#else
|
#else
|
||||||
if (x!=0)
|
if (x!=0)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(2*x-1, (int)sizeof(float), src_offset));
|
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(2*x-1, (int)sizeof(float), src_offset));
|
||||||
@ -800,7 +818,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
|
|||||||
RADIX_PROCESS;
|
RADIX_PROCESS;
|
||||||
|
|
||||||
// copy data to dst
|
// copy data to dst
|
||||||
__global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)), dst_offset));
|
__global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float2)), dst_offset));
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=0; i<kercn; i++)
|
for (int i=0; i<kercn; i++)
|
||||||
|
@ -66,7 +66,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
|
|||||||
{
|
{
|
||||||
cv::Size dft_size;
|
cv::Size dft_size;
|
||||||
int dft_flags, depth, cn, dft_type;
|
int dft_flags, depth, cn, dft_type;
|
||||||
bool inplace;
|
bool hint;
|
||||||
bool is1d;
|
bool is1d;
|
||||||
|
|
||||||
TEST_DECLARE_INPUT_PARAMETER(src);
|
TEST_DECLARE_INPUT_PARAMETER(src);
|
||||||
@ -93,9 +93,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
|
|||||||
dft_flags |= cv::DFT_ROWS;
|
dft_flags |= cv::DFT_ROWS;
|
||||||
if (GET_PARAM(4))
|
if (GET_PARAM(4))
|
||||||
dft_flags |= cv::DFT_SCALE;
|
dft_flags |= cv::DFT_SCALE;
|
||||||
inplace = GET_PARAM(5);
|
hint = GET_PARAM(5);
|
||||||
|
|
||||||
|
|
||||||
is1d = (dft_flags & DFT_ROWS) != 0 || dft_size.height == 1;
|
is1d = (dft_flags & DFT_ROWS) != 0 || dft_size.height == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,9 +101,6 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
|
|||||||
{
|
{
|
||||||
src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0);
|
src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0);
|
||||||
usrc = src.getUMat(ACCESS_READ);
|
usrc = src.getUMat(ACCESS_READ);
|
||||||
|
|
||||||
if (inplace)
|
|
||||||
dst = src, udst = usrc;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -113,8 +108,9 @@ OCL_TEST_P(Dft, Mat)
|
|||||||
{
|
{
|
||||||
generateTestData();
|
generateTestData();
|
||||||
|
|
||||||
OCL_OFF(cv::dft(src, dst, dft_flags));
|
int nonzero_rows = hint ? src.cols - randomInt(1, src.rows-1) : 0;
|
||||||
OCL_ON(cv::dft(usrc, udst, dft_flags));
|
OCL_OFF(cv::dft(src, dst, dft_flags, nonzero_rows));
|
||||||
|
OCL_ON(cv::dft(usrc, udst, dft_flags, nonzero_rows));
|
||||||
|
|
||||||
if (dft_type == R2C && is1d && (dft_flags & cv::DFT_INVERSE) == 0)
|
if (dft_type == R2C && is1d && (dft_flags & cv::DFT_INVERSE) == 0)
|
||||||
{
|
{
|
||||||
@ -122,15 +118,16 @@ OCL_TEST_P(Dft, Mat)
|
|||||||
udst = udst(cv::Range(0, udst.rows), cv::Range(0, udst.cols/2 + 1));
|
udst = udst(cv::Range(0, udst.rows), cv::Range(0, udst.cols/2 + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat gpu = udst.getMat(ACCESS_READ);
|
//Mat gpu = udst.getMat(ACCESS_READ);
|
||||||
std::cout << src << std::endl;
|
//std::cout << dst << std::endl;
|
||||||
std::cout << dst << std::endl;
|
//std::cout << gpu << std::endl;
|
||||||
std::cout << gpu << std::endl;
|
|
||||||
|
|
||||||
//int cn = udst.channels();
|
//int cn = udst.channels();
|
||||||
//
|
//
|
||||||
|
//Mat dst1ch = dst.reshape(1);
|
||||||
|
//Mat gpu1ch = gpu.reshape(1);
|
||||||
//Mat df;
|
//Mat df;
|
||||||
//absdiff(dst, gpu, df);
|
//absdiff(dst1ch, gpu1ch, df);
|
||||||
//std::cout << Mat_<int>(df) << std::endl;
|
//std::cout << Mat_<int>(df) << std::endl;
|
||||||
|
|
||||||
double eps = src.size().area() * 1e-4;
|
double eps = src.size().area() * 1e-4;
|
||||||
@ -188,13 +185,12 @@ OCL_TEST_P(MulSpectrums, Mat)
|
|||||||
|
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool()));
|
OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool()));
|
||||||
|
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(4, 1), cv::Size(5, 8), cv::Size(6, 6),
|
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(10, 10), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)),
|
||||||
cv::Size(512, 1), cv::Size(1280, 768)),
|
Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R),
|
||||||
Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R),
|
|
||||||
Bool(), // DFT_INVERSE
|
Bool(), // DFT_INVERSE
|
||||||
Bool(), // DFT_ROWS
|
Bool(), // DFT_ROWS
|
||||||
Bool(), // DFT_SCALE
|
Bool(), // DFT_SCALE
|
||||||
Bool() // inplace
|
Bool() // hint
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user