Added packing to CCS format

This commit is contained in:
Alexander Karsakov
2014-07-17 12:31:41 +04:00
parent ed07241f89
commit 6c8b6bd0c7
3 changed files with 82 additions and 101 deletions

View File

@@ -2083,20 +2083,19 @@ struct OCL_FftPlan
int dft_size;
int flags;
OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags)
bool status;
OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags), status(true)
{
int min_radix = INT_MAX;
std::vector<int> radixes, blocks;
ocl_getRadixes(dft_size, radixes, blocks, min_radix);
thread_count = (dft_size + min_radix-1) / min_radix;
printf("cols: %d - ", dft_size);
for (int i=0; i<radixes.size(); i++)
if (thread_count > ocl::Device::getDefault().maxWorkGroupSize())
{
printf("%d ", radixes[i]);
status = false;
return;
}
printf("min radix - %d\n", min_radix);
// generate string with radix calls
String radix_processing;
@@ -2142,6 +2141,9 @@ struct OCL_FftPlan
bool enqueueTransform(InputArray _src, OutputArray _dst, int dft_size, int flags, bool rows = true) const
{
if (!status)
return false;
UMat src = _src.getUMat();
UMat dst = _dst.getUMat();
@@ -2162,11 +2164,14 @@ struct OCL_FftPlan
kernel_name = "fft_multi_radix_cols";
}
bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1;
String options = buildOptions;
if (src.channels() == 1)
options += " -D REAL_INPUT";
if (dst.channels() == 1)
options += " -D CCS_OUTPUT";
if ((is1d && src.channels() == 1) || (rows && (flags & DFT_REAL_OUTPUT)))
options += " -D NO_CONJUGATE";
ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
if (k.empty())
@@ -2219,61 +2224,16 @@ protected:
std::vector<OCL_FftPlan*> planStorage;
};
static bool ocl_packToCCS(InputArray _src, OutputArray _dst, int flags)
{
UMat src = _src.getUMat();
_dst.create(src.size(), CV_32F);
UMat dst = _dst.getUMat();
src = src.reshape(1);
if ((flags & DFT_ROWS) == 0 && src.rows > 1)
{
// pack to CCS by rows
if (dst.cols > 2)
src.colRange(2, dst.cols + (dst.cols % 2)).copyTo(dst.colRange(1, dst.cols-1 + (dst.cols % 2)));
Mat dst_mat = dst.getMat(ACCESS_WRITE);
Mat buffer_mat = src.getMat(ACCESS_READ);
dst_mat.at<float>(0,0) = buffer_mat.at<float>(0,0);
dst_mat.at<float>(dst_mat.rows-1,0) = buffer_mat.at<float>(src.rows/2,0);
for (int i=1; i<dst_mat.rows-1; i+=2)
{
dst_mat.at<float>(i,0) = buffer_mat.at<float>((i+1)/2,0);
dst_mat.at<float>(i+1,0) = buffer_mat.at<float>((i+1)/2,1);
}
if (dst_mat.cols % 2 == 0)
{
dst_mat.at<float>(0,dst_mat.cols-1) = buffer_mat.at<float>(0,src.cols/2);
dst_mat.at<float>(dst_mat.rows-1,dst_mat.cols-1) = buffer_mat.at<float>(src.rows/2,src.cols/2);
for (int i=1; i<dst_mat.rows-1; i+=2)
{
dst_mat.at<float>(i,dst_mat.cols-1) = buffer_mat.at<float>((i+1)/2,src.cols/2);
dst_mat.at<float>(i+1,dst_mat.cols-1) = buffer_mat.at<float>((i+1)/2,src.cols/2+1);
}
}
}
else
{
// pack to CCS each row
src.colRange(0,1).copyTo(dst.colRange(0,1));
src.colRange(2, (dst.cols+1)).copyTo(dst.colRange(1, dst.cols));
}
return true;
}
static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags)
{
const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags);
return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, true);
}
static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int flags)
static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags)
{
const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), flags);
return plan->enqueueTransform(_src, _dst, _src.cols(), flags, false);
return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, false);
}
static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
@@ -2315,6 +2275,8 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
// Forward Complex to CCS not supported
if (complex_input && real_output && !inv)
{
flags ^= DFT_REAL_OUTPUT;
flags |= DFT_COMPLEX_OUTPUT;
real_output = 0;
complex_output = 1;
}
@@ -2344,23 +2306,21 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
if (complex_output)
{
if (real_input && is1d && !inv)
output.create(src.size(), CV_32FC2);
else
{
_dst.create(src.size(), CV_32FC2);
output = _dst.getUMat();
}
} else
_dst.create(src.size(), CV_32FC2);
output = _dst.getUMat();
}
else
{
// CCS
if (is1d)
{
_dst.create(src.size(), CV_32FC1);
output = _dst.getUMat();
}
else
{
_dst.create(src.size(), CV_32FC1);
output.create(src.size(), CV_32FC2);
}
}
if (!ocl_dft_C2C_rows(input, output, nonzero_rows, flags))
@@ -2368,32 +2328,13 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
if (!is1d)
{
if (!ocl_dft_C2C_cols(output, output, flags))
int nonzero_cols = real_input && real_output ? output.cols/2 + 1 : output.cols;
if (!ocl_dft_C2C_cols(output, _dst, nonzero_cols, flags))
return false;
}
if (complex_output)
} else
{
if (real_input && is1d && !inv)
_dst.assign(output.colRange(0, output.cols/2+1));
else
_dst.assign(output);
_dst.assign(output);
}
else
{
if (!inv)
{
if (!is1d)
ocl_packToCCS(output, _dst, flags);
else
_dst.assign(output);
}
else
{
// copy real part to dst
}
}
//printf("OCL!\n");
return true;
}
@@ -2435,7 +2376,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
int factors[34];
bool inplace_transform = false;
bool is1d = (flags & DFT_ROWS) != 0 || src.rows == 1;
#ifdef USE_IPP_DFT
AutoBuffer<uchar> ippbuf;
int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
@@ -2444,10 +2384,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
if (!is1d)
_dst.create( src.size(), CV_MAKETYPE(depth, 2) );
else
_dst.create( Size(src.cols/2+1, src.rows), CV_MAKETYPE(depth, 2) );
_dst.create( src.size(), CV_MAKETYPE(depth, 2) );
else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
_dst.create( src.size(), depth );
else