some bugs fix in using AmdFft library

This commit is contained in:
yao 2013-02-08 10:46:43 +08:00
parent db9de43fa5
commit 4d6827212d
2 changed files with 144 additions and 89 deletions

View File

@ -45,29 +45,28 @@
#include <iomanip>
#include "precomp.hpp"
#ifdef HAVE_CLAMDFFT
using namespace cv;
using namespace cv::ocl;
using namespace std;
#if !defined HAVE_OPENCL
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
void cv::ocl::dft(const oclMat &, oclMat &, Size , int )
{
throw_nogpu();
}
#elif !defined HAVE_CLAMDFFT
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
{
CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
}
#else
#include <clAmdFft.h>
#include "clAmdFft.h"
namespace cv
{
namespace ocl
{
void fft_setup();
void fft_teardown();
enum FftType
{
C2R = 1, // complex to complex
@ -76,73 +75,94 @@ namespace cv
};
struct FftPlan
{
friend void fft_setup();
friend void fft_teardown();
~FftPlan();
protected:
clAmdFftPlanHandle plHandle;
FftPlan& operator=(const FftPlan&);
public:
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
~FftPlan();
inline clAmdFftPlanHandle getPlanHandle() { return plHandle; }
const Size dft_size;
const int src_step, dst_step;
const int flags;
const FftType type;
clAmdFftPlanHandle plHandle;
static vector<FftPlan *> planStore;
static bool started;
static clAmdFftSetupData *setupData;
};
class PlanCache
{
protected:
PlanCache();
~PlanCache();
friend class auto_ptr<PlanCache>;
static auto_ptr<PlanCache> planCache;
bool started;
vector<FftPlan *> planStore;
clAmdFftSetupData *setupData;
public:
friend void fft_setup();
friend void fft_teardown();
static PlanCache* getPlanCache()
{
if( NULL == planCache.get())
planCache.reset(new PlanCache());
return planCache.get();
}
// return a baked plan->
// if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it.
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
// remove a single plan from the store
// return true if the plan is successfully removed
// else
static bool removePlan(clAmdFftPlanHandle );
};
}
}
bool cv::ocl::FftPlan::started = false;
vector<cv::ocl::FftPlan *> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan *>();
clAmdFftSetupData *cv::ocl::FftPlan::setupData = 0;
auto_ptr<PlanCache> PlanCache::planCache;
void cv::ocl::fft_setup()
{
if(FftPlan::started)
PlanCache& pCache = *PlanCache::getPlanCache();
if(pCache.started)
{
return;
}
FftPlan::setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData ));
FftPlan::started = true;
pCache.setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( pCache.setupData ));
pCache.started = true;
}
void cv::ocl::fft_teardown()
{
if(!FftPlan::started)
PlanCache& pCache = *PlanCache::getPlanCache();
if(!pCache.started)
{
return;
}
delete FftPlan::setupData;
for(int i = 0; i < FftPlan::planStore.size(); i ++)
delete pCache.setupData;
for(size_t i = 0; i < pCache.planStore.size(); i ++)
{
delete FftPlan::planStore[i];
delete pCache.planStore[i];
}
FftPlan::planStore.clear();
pCache.planStore.clear();
openCLSafeCall( clAmdFftTeardown( ) );
FftPlan::started = false;
pCache.started = false;
}
// bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
: dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type)
{
if(!FftPlan::started)
{
// implicitly do fft setup
fft_setup();
}
fft_setup();
bool is_1d_input = (_dft_size.height == 1);
int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
clAmdFftResultLocation place;
//clAmdFftResultLocation place;
clAmdFftLayout inLayout;
clAmdFftLayout outLayout;
clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
@ -150,7 +170,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
size_t batchSize = is_row_dft ? dft_size.height : 1;
size_t clLengthsIn[ 3 ] = {1, 1, 1};
size_t clStridesIn[ 3 ] = {1, 1, 1};
size_t clLengthsOut[ 3 ] = {1, 1, 1};
//size_t clLengthsOut[ 3 ] = {1, 1, 1};
size_t clStridesOut[ 3 ] = {1, 1, 1};
clLengthsIn[0] = dft_size.width;
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
@ -166,14 +186,12 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
clStridesOut[1] = clStridesIn[1];
break;
case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(float);
clStridesOut[1] = dst_step / sizeof(std::complex<float>);
break;
case C2R:
CV_Assert(!is_row_dft); // this is not supported yet
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / sizeof(std::complex<float>);
@ -197,27 +215,39 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) );
openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) );
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesOut[ dim ]) );
float scale_ = is_scaled_dft ? 1.f / _dft_size.area() : 1.f;
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
//ready to bake
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
}
cv::ocl::FftPlan::~FftPlan()
{
for(int i = 0; i < planStore.size(); i ++)
{
if(planStore[i]->plHandle == plHandle)
{
planStore.erase(planStore.begin() + i);
}
}
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
}
clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
cv::ocl::PlanCache::PlanCache()
: started(false),
planStore(vector<cv::ocl::FftPlan *>()),
setupData(NULL)
{
}
cv::ocl::PlanCache::~PlanCache()
{
fft_teardown();
}
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
{
PlanCache& pCache = *PlanCache::getPlanCache();
vector<FftPlan *>& pStore = pCache.planStore;
// go through search
for(int i = 0; i < planStore.size(); i ++)
for(size_t i = 0; i < pStore.size(); i ++)
{
FftPlan *plan = planStore[i];
FftPlan *plan = pStore[i];
if(
plan->dft_size.width == _dft_size.width &&
plan->dft_size.height == _dft_size.height &&
@ -225,15 +255,31 @@ clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int
plan->src_step == _src_step &&
plan->dst_step == _dst_step &&
plan->type == _type
)
)
{
return plan->plHandle;
return plan;
}
}
// no baked plan is found
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
planStore.push_back(newPlan);
return newPlan->plHandle;
pStore.push_back(newPlan);
return newPlan;
}
bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle)
{
PlanCache& pCache = *PlanCache::getPlanCache();
vector<FftPlan *>& pStore = pCache.planStore;
for(size_t i = 0; i < pStore.size(); i ++)
{
if(pStore[i]->getPlanHandle() == plHandle)
{
pStore.erase(pStore.begin() + i);
delete pStore[i];
return true;
}
}
return false;
}
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
@ -245,19 +291,20 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
// check if the given dft size is of optimal dft size
CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
// the two flags are not compatible
CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) );
// similar assertions with cuda module
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
// we don't support DFT_SCALE flag
CV_Assert(!(DFT_SCALE & flags));
bool is_1d_input = (src.rows == 1);
int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
//bool is_1d_input = (src.rows == 1);
//int is_row_dft = flags & DFT_ROWS;
//int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
// We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output);
FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
@ -268,12 +315,10 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
dst.create(src.rows, src.cols, CV_32FC2);
break;
case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
dst.create(src.rows, src.cols / 2 + 1, CV_32FC2);
break;
case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
CV_Assert(!is_row_dft); // this is not supported yet
dst.create(src.rows, dft_size.width, CV_32FC1);
break;
default:
@ -282,13 +327,14 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
throw exception();
break;
}
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle();
//get the buffersize
size_t buffersize = 0;
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
//allocate the intermediate buffer
// TODO, bind this with the current FftPlan
cl_mem clMedBuffer = NULL;
if (buffersize)
{
@ -297,17 +343,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
openCLSafeCall( medstatus );
}
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1,
&src.clCxt->impl->clCmdQueue,
0, NULL, NULL,
(cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1,
&src.clCxt->impl->clCmdQueue,
0, NULL, NULL,
(cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
if(clMedBuffer)
{
openCLFree(clMedBuffer);
}
//fft_teardown();
}
#endif
#endif //HAVE_CLAMDFFT

View File

@ -48,50 +48,59 @@ using namespace std;
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, bool)
PARAM_TEST_CASE(Dft, cv::Size, int)
{
cv::Size dft_size;
bool dft_rows;
//std::vector<cv::ocl::Info> oclinfo;
int dft_flags;
virtual void SetUp()
{
//int devnums = getDevice(oclinfo);
// CV_Assert(devnums > 0);
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
dft_size = GET_PARAM(0);
dft_flags = GET_PARAM(1);
}
};
TEST_P(Dft, C2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0);
cv::Mat b_gold;
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::ocl::oclMat d_b;
cv::dft(a, b_gold, flags);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::dft(a, b_gold, dft_flags);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
}
TEST_P(Dft, R2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0);
cv::Mat b_gold, b_gold_roi;
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
cv::dft(a, b_gold, cv::DFT_COMPLEX_OUTPUT | dft_flags);
b_gold_roi = b_gold(cv::Rect(0, 0, d_b.cols, d_b.rows));
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, "");
cv::Mat c_gold;
cv::dft(b_gold, c_gold, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, "");
}
TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0);
cv::ocl::dft(d_b, d_c, a.size(), cv::DFT_SCALE | cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
testing::Values(false, true)));
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
#endif // HAVE_CLAMDFFT