Merge pull request #450 from bitwangyaoyao:2.4_dft

This commit is contained in:
Andrey Kamaev 2013-02-08 18:43:34 +04:00 committed by OpenCV Buildbot
commit 4f4fe553bc
2 changed files with 144 additions and 89 deletions

View File

@ -45,29 +45,28 @@
#include <iomanip> #include <iomanip>
#include "precomp.hpp" #include "precomp.hpp"
#ifdef HAVE_CLAMDFFT
using namespace cv; using namespace cv;
using namespace cv::ocl; using namespace cv::ocl;
using namespace std; using namespace std;
#if !defined HAVE_OPENCL #if !defined HAVE_OPENCL
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags) void cv::ocl::dft(const oclMat &, oclMat &, Size , int )
{ {
throw_nogpu(); throw_nogpu();
} }
#elif !defined HAVE_CLAMDFFT #elif !defined HAVE_CLAMDFFT
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags) void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
{ {
CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented"); CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
} }
#else #else
#include <clAmdFft.h> #include "clAmdFft.h"
namespace cv namespace cv
{ {
namespace ocl namespace ocl
{ {
void fft_setup();
void fft_teardown();
enum FftType enum FftType
{ {
C2R = 1, // complex to complex C2R = 1, // complex to complex
@ -76,73 +75,94 @@ namespace cv
}; };
struct FftPlan struct FftPlan
{ {
friend void fft_setup();
friend void fft_teardown();
~FftPlan();
protected: protected:
clAmdFftPlanHandle plHandle;
FftPlan& operator=(const FftPlan&);
public:
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
~FftPlan();
inline clAmdFftPlanHandle getPlanHandle() { return plHandle; }
const Size dft_size; const Size dft_size;
const int src_step, dst_step; const int src_step, dst_step;
const int flags; const int flags;
const FftType type; const FftType type;
clAmdFftPlanHandle plHandle; };
static vector<FftPlan *> planStore; class PlanCache
static bool started; {
static clAmdFftSetupData *setupData; protected:
PlanCache();
~PlanCache();
friend class auto_ptr<PlanCache>;
static auto_ptr<PlanCache> planCache;
bool started;
vector<FftPlan *> planStore;
clAmdFftSetupData *setupData;
public: public:
friend void fft_setup();
friend void fft_teardown();
static PlanCache* getPlanCache()
{
if( NULL == planCache.get())
planCache.reset(new PlanCache());
return planCache.get();
}
// return a baked plan-> // return a baked plan->
// if there is one matched plan, return it // if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it. // if not, bake a new one, put it into the planStore and return it.
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
// remove a single plan from the store
// return true if the plan is successfully removed
// else
static bool removePlan(clAmdFftPlanHandle );
}; };
} }
} }
bool cv::ocl::FftPlan::started = false; auto_ptr<PlanCache> PlanCache::planCache;
vector<cv::ocl::FftPlan *> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan *>();
clAmdFftSetupData *cv::ocl::FftPlan::setupData = 0;
void cv::ocl::fft_setup() void cv::ocl::fft_setup()
{ {
if(FftPlan::started) PlanCache& pCache = *PlanCache::getPlanCache();
if(pCache.started)
{ {
return; return;
} }
FftPlan::setupData = new clAmdFftSetupData; pCache.setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData )); openCLSafeCall(clAmdFftInitSetupData( pCache.setupData ));
FftPlan::started = true; pCache.started = true;
} }
void cv::ocl::fft_teardown() void cv::ocl::fft_teardown()
{ {
if(!FftPlan::started) PlanCache& pCache = *PlanCache::getPlanCache();
if(!pCache.started)
{ {
return; return;
} }
delete FftPlan::setupData; delete pCache.setupData;
for(int i = 0; i < FftPlan::planStore.size(); i ++) for(size_t i = 0; i < pCache.planStore.size(); i ++)
{ {
delete FftPlan::planStore[i]; delete pCache.planStore[i];
} }
FftPlan::planStore.clear(); pCache.planStore.clear();
openCLSafeCall( clAmdFftTeardown( ) ); openCLSafeCall( clAmdFftTeardown( ) );
FftPlan::started = false; pCache.started = false;
} }
// bake a new plan // bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
: dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0) : plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type)
{ {
if(!FftPlan::started) fft_setup();
{
// implicitly do fft setup
fft_setup();
}
bool is_1d_input = (_dft_size.height == 1); bool is_1d_input = (_dft_size.height == 1);
int is_row_dft = flags & DFT_ROWS; int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE; int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE; int is_inverse = flags & DFT_INVERSE;
clAmdFftResultLocation place; //clAmdFftResultLocation place;
clAmdFftLayout inLayout; clAmdFftLayout inLayout;
clAmdFftLayout outLayout; clAmdFftLayout outLayout;
clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D; clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
@ -150,7 +170,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
size_t batchSize = is_row_dft ? dft_size.height : 1; size_t batchSize = is_row_dft ? dft_size.height : 1;
size_t clLengthsIn[ 3 ] = {1, 1, 1}; size_t clLengthsIn[ 3 ] = {1, 1, 1};
size_t clStridesIn[ 3 ] = {1, 1, 1}; size_t clStridesIn[ 3 ] = {1, 1, 1};
size_t clLengthsOut[ 3 ] = {1, 1, 1}; //size_t clLengthsOut[ 3 ] = {1, 1, 1};
size_t clStridesOut[ 3 ] = {1, 1, 1}; size_t clStridesOut[ 3 ] = {1, 1, 1};
clLengthsIn[0] = dft_size.width; clLengthsIn[0] = dft_size.width;
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height; clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
@ -166,14 +186,12 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
clStridesOut[1] = clStridesIn[1]; clStridesOut[1] = clStridesIn[1];
break; break;
case R2C: case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
inLayout = CLFFT_REAL; inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED; outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(float); clStridesIn[1] = src_step / sizeof(float);
clStridesOut[1] = dst_step / sizeof(std::complex<float>); clStridesOut[1] = dst_step / sizeof(std::complex<float>);
break; break;
case C2R: case C2R:
CV_Assert(!is_row_dft); // this is not supported yet
inLayout = CLFFT_HERMITIAN_INTERLEAVED; inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL; outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / sizeof(std::complex<float>); clStridesIn[1] = src_step / sizeof(std::complex<float>);
@ -197,27 +215,39 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) ); openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) );
openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) ); openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) ); openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesOut[ dim ]) );
float scale_ = is_scaled_dft ? 1.f / _dft_size.area() : 1.f;
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
//ready to bake
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) ); openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
} }
cv::ocl::FftPlan::~FftPlan() cv::ocl::FftPlan::~FftPlan()
{ {
for(int i = 0; i < planStore.size(); i ++)
{
if(planStore[i]->plHandle == plHandle)
{
planStore.erase(planStore.begin() + i);
}
}
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) ); openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
} }
clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) cv::ocl::PlanCache::PlanCache()
: started(false),
planStore(vector<cv::ocl::FftPlan *>()),
setupData(NULL)
{ {
}
cv::ocl::PlanCache::~PlanCache()
{
fft_teardown();
}
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
{
PlanCache& pCache = *PlanCache::getPlanCache();
vector<FftPlan *>& pStore = pCache.planStore;
// go through search // go through search
for(int i = 0; i < planStore.size(); i ++) for(size_t i = 0; i < pStore.size(); i ++)
{ {
FftPlan *plan = planStore[i]; FftPlan *plan = pStore[i];
if( if(
plan->dft_size.width == _dft_size.width && plan->dft_size.width == _dft_size.width &&
plan->dft_size.height == _dft_size.height && plan->dft_size.height == _dft_size.height &&
@ -225,15 +255,31 @@ clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int
plan->src_step == _src_step && plan->src_step == _src_step &&
plan->dst_step == _dst_step && plan->dst_step == _dst_step &&
plan->type == _type plan->type == _type
) )
{ {
return plan->plHandle; return plan;
} }
} }
// no baked plan is found // no baked plan is found
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type); FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
planStore.push_back(newPlan); pStore.push_back(newPlan);
return newPlan->plHandle; return newPlan;
}
bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle)
{
PlanCache& pCache = *PlanCache::getPlanCache();
vector<FftPlan *>& pStore = pCache.planStore;
for(size_t i = 0; i < pStore.size(); i ++)
{
if(pStore[i]->getPlanHandle() == plHandle)
{
pStore.erase(pStore.begin() + i);
delete pStore[i];
return true;
}
}
return false;
} }
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
@ -245,19 +291,20 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
// check if the given dft size is of optimal dft size // check if the given dft size is of optimal dft size
CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area())); CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
// the two flags are not compatible
CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) );
// similar assertions with cuda module // similar assertions with cuda module
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
// we don't support DFT_SCALE flag //bool is_1d_input = (src.rows == 1);
CV_Assert(!(DFT_SCALE & flags)); //int is_row_dft = flags & DFT_ROWS;
//int is_scaled_dft = flags & DFT_SCALE;
bool is_1d_input = (src.rows == 1);
int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE; int is_inverse = flags & DFT_INVERSE;
bool is_complex_input = src.channels() == 2; bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT); bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
// We don't support real-to-real transform // We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output); CV_Assert(is_complex_input || is_complex_output);
FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1); FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
@ -268,12 +315,10 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
dst.create(src.rows, src.cols, CV_32FC2); dst.create(src.rows, src.cols, CV_32FC2);
break; break;
case R2C: case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
dst.create(src.rows, src.cols / 2 + 1, CV_32FC2); dst.create(src.rows, src.cols / 2 + 1, CV_32FC2);
break; break;
case C2R: case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows); CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
CV_Assert(!is_row_dft); // this is not supported yet
dst.create(src.rows, dft_size.width, CV_32FC1); dst.create(src.rows, dft_size.width, CV_32FC1);
break; break;
default: default:
@ -282,13 +327,14 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
throw exception(); throw exception();
break; break;
} }
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type); clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle();
//get the buffersize //get the buffersize
size_t buffersize = 0; size_t buffersize = 0;
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) ); openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
//allocate the intermediate buffer //allocate the intermediate buffer
// TODO, bind this with the current FftPlan
cl_mem clMedBuffer = NULL; cl_mem clMedBuffer = NULL;
if (buffersize) if (buffersize)
{ {
@ -297,17 +343,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
openCLSafeCall( medstatus ); openCLSafeCall( medstatus );
} }
openCLSafeCall( clAmdFftEnqueueTransform( plHandle, openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1, 1,
&src.clCxt->impl->clCmdQueue, &src.clCxt->impl->clCmdQueue,
0, NULL, NULL, 0, NULL, NULL,
(cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) ); (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) ); openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
if(clMedBuffer) if(clMedBuffer)
{ {
openCLFree(clMedBuffer); openCLFree(clMedBuffer);
} }
//fft_teardown();
} }
#endif #endif
#endif //HAVE_CLAMDFFT

View File

@ -48,50 +48,59 @@ using namespace std;
#ifdef HAVE_CLAMDFFT #ifdef HAVE_CLAMDFFT
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Dft // Dft
PARAM_TEST_CASE(Dft, cv::Size, bool) PARAM_TEST_CASE(Dft, cv::Size, int)
{ {
cv::Size dft_size; cv::Size dft_size;
bool dft_rows; int dft_flags;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp() virtual void SetUp()
{ {
//int devnums = getDevice(oclinfo); dft_size = GET_PARAM(0);
// CV_Assert(devnums > 0); dft_flags = GET_PARAM(1);
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
} }
}; };
TEST_P(Dft, C2C) TEST_P(Dft, C2C)
{ {
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0);
cv::Mat b_gold; cv::Mat b_gold;
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::ocl::oclMat d_b; cv::ocl::oclMat d_b;
cv::dft(a, b_gold, flags); cv::dft(a, b_gold, dft_flags);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, ""); EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
} }
TEST_P(Dft, R2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0);
cv::Mat b_gold, b_gold_roi;
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
cv::dft(a, b_gold, cv::DFT_COMPLEX_OUTPUT | dft_flags);
b_gold_roi = b_gold(cv::Rect(0, 0, d_b.cols, d_b.rows));
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, "");
cv::Mat c_gold;
cv::dft(b_gold, c_gold, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, "");
}
TEST_P(Dft, R2CthenC2R) TEST_P(Dft, R2CthenC2R)
{ {
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c; cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); cv::ocl::dft(d_b, d_c, a.size(), cv::DFT_SCALE | cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
} }
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
testing::Values(cv::Size(5, 4), cv::Size(20, 20)), INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
testing::Values(false, true))); testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
#endif // HAVE_CLAMDFFT #endif // HAVE_CLAMDFFT