some bugs fix in using AmdFft library
This commit is contained in:
parent
db9de43fa5
commit
4d6827212d
@ -45,29 +45,28 @@
|
||||
#include <iomanip>
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
#if !defined HAVE_OPENCL
|
||||
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
||||
void cv::ocl::dft(const oclMat &, oclMat &, Size , int )
|
||||
{
|
||||
throw_nogpu();
|
||||
}
|
||||
#elif !defined HAVE_CLAMDFFT
|
||||
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
||||
void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
|
||||
{
|
||||
CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
|
||||
}
|
||||
#else
|
||||
#include <clAmdFft.h>
|
||||
|
||||
#include "clAmdFft.h"
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
void fft_setup();
|
||||
void fft_teardown();
|
||||
enum FftType
|
||||
{
|
||||
C2R = 1, // complex to complex
|
||||
@ -76,73 +75,94 @@ namespace cv
|
||||
};
|
||||
struct FftPlan
|
||||
{
|
||||
friend void fft_setup();
|
||||
friend void fft_teardown();
|
||||
~FftPlan();
|
||||
protected:
|
||||
clAmdFftPlanHandle plHandle;
|
||||
FftPlan& operator=(const FftPlan&);
|
||||
public:
|
||||
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
||||
~FftPlan();
|
||||
inline clAmdFftPlanHandle getPlanHandle() { return plHandle; }
|
||||
|
||||
const Size dft_size;
|
||||
const int src_step, dst_step;
|
||||
const int flags;
|
||||
const FftType type;
|
||||
clAmdFftPlanHandle plHandle;
|
||||
static vector<FftPlan *> planStore;
|
||||
static bool started;
|
||||
static clAmdFftSetupData *setupData;
|
||||
};
|
||||
class PlanCache
|
||||
{
|
||||
protected:
|
||||
PlanCache();
|
||||
~PlanCache();
|
||||
friend class auto_ptr<PlanCache>;
|
||||
static auto_ptr<PlanCache> planCache;
|
||||
|
||||
bool started;
|
||||
vector<FftPlan *> planStore;
|
||||
clAmdFftSetupData *setupData;
|
||||
public:
|
||||
friend void fft_setup();
|
||||
friend void fft_teardown();
|
||||
|
||||
static PlanCache* getPlanCache()
|
||||
{
|
||||
if( NULL == planCache.get())
|
||||
planCache.reset(new PlanCache());
|
||||
return planCache.get();
|
||||
}
|
||||
// return a baked plan->
|
||||
// if there is one matched plan, return it
|
||||
// if not, bake a new one, put it into the planStore and return it.
|
||||
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
||||
static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
||||
|
||||
// remove a single plan from the store
|
||||
// return true if the plan is successfully removed
|
||||
// else
|
||||
static bool removePlan(clAmdFftPlanHandle );
|
||||
};
|
||||
}
|
||||
}
|
||||
bool cv::ocl::FftPlan::started = false;
|
||||
vector<cv::ocl::FftPlan *> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan *>();
|
||||
clAmdFftSetupData *cv::ocl::FftPlan::setupData = 0;
|
||||
auto_ptr<PlanCache> PlanCache::planCache;
|
||||
|
||||
void cv::ocl::fft_setup()
|
||||
{
|
||||
if(FftPlan::started)
|
||||
PlanCache& pCache = *PlanCache::getPlanCache();
|
||||
if(pCache.started)
|
||||
{
|
||||
return;
|
||||
}
|
||||
FftPlan::setupData = new clAmdFftSetupData;
|
||||
openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData ));
|
||||
FftPlan::started = true;
|
||||
pCache.setupData = new clAmdFftSetupData;
|
||||
openCLSafeCall(clAmdFftInitSetupData( pCache.setupData ));
|
||||
pCache.started = true;
|
||||
}
|
||||
void cv::ocl::fft_teardown()
|
||||
{
|
||||
if(!FftPlan::started)
|
||||
PlanCache& pCache = *PlanCache::getPlanCache();
|
||||
if(!pCache.started)
|
||||
{
|
||||
return;
|
||||
}
|
||||
delete FftPlan::setupData;
|
||||
for(int i = 0; i < FftPlan::planStore.size(); i ++)
|
||||
delete pCache.setupData;
|
||||
for(size_t i = 0; i < pCache.planStore.size(); i ++)
|
||||
{
|
||||
delete FftPlan::planStore[i];
|
||||
delete pCache.planStore[i];
|
||||
}
|
||||
FftPlan::planStore.clear();
|
||||
pCache.planStore.clear();
|
||||
openCLSafeCall( clAmdFftTeardown( ) );
|
||||
FftPlan::started = false;
|
||||
pCache.started = false;
|
||||
}
|
||||
|
||||
// bake a new plan
|
||||
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
|
||||
: dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0)
|
||||
: plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type)
|
||||
{
|
||||
if(!FftPlan::started)
|
||||
{
|
||||
// implicitly do fft setup
|
||||
fft_setup();
|
||||
}
|
||||
fft_setup();
|
||||
|
||||
bool is_1d_input = (_dft_size.height == 1);
|
||||
int is_row_dft = flags & DFT_ROWS;
|
||||
int is_scaled_dft = flags & DFT_SCALE;
|
||||
int is_inverse = flags & DFT_INVERSE;
|
||||
int is_scaled_dft = flags & DFT_SCALE;
|
||||
int is_inverse = flags & DFT_INVERSE;
|
||||
|
||||
clAmdFftResultLocation place;
|
||||
//clAmdFftResultLocation place;
|
||||
clAmdFftLayout inLayout;
|
||||
clAmdFftLayout outLayout;
|
||||
clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
|
||||
@ -150,7 +170,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
|
||||
size_t batchSize = is_row_dft ? dft_size.height : 1;
|
||||
size_t clLengthsIn[ 3 ] = {1, 1, 1};
|
||||
size_t clStridesIn[ 3 ] = {1, 1, 1};
|
||||
size_t clLengthsOut[ 3 ] = {1, 1, 1};
|
||||
//size_t clLengthsOut[ 3 ] = {1, 1, 1};
|
||||
size_t clStridesOut[ 3 ] = {1, 1, 1};
|
||||
clLengthsIn[0] = dft_size.width;
|
||||
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
|
||||
@ -166,14 +186,12 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
|
||||
clStridesOut[1] = clStridesIn[1];
|
||||
break;
|
||||
case R2C:
|
||||
CV_Assert(!is_row_dft); // this is not supported yet
|
||||
inLayout = CLFFT_REAL;
|
||||
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
|
||||
clStridesIn[1] = src_step / sizeof(float);
|
||||
clStridesOut[1] = dst_step / sizeof(std::complex<float>);
|
||||
break;
|
||||
case C2R:
|
||||
CV_Assert(!is_row_dft); // this is not supported yet
|
||||
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
|
||||
outLayout = CLFFT_REAL;
|
||||
clStridesIn[1] = src_step / sizeof(std::complex<float>);
|
||||
@ -197,27 +215,39 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
|
||||
|
||||
openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) );
|
||||
openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
|
||||
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) );
|
||||
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesOut[ dim ]) );
|
||||
|
||||
float scale_ = is_scaled_dft ? 1.f / _dft_size.area() : 1.f;
|
||||
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
|
||||
|
||||
//ready to bake
|
||||
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
|
||||
}
|
||||
cv::ocl::FftPlan::~FftPlan()
|
||||
{
|
||||
for(int i = 0; i < planStore.size(); i ++)
|
||||
{
|
||||
if(planStore[i]->plHandle == plHandle)
|
||||
{
|
||||
planStore.erase(planStore.begin() + i);
|
||||
}
|
||||
}
|
||||
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
|
||||
}
|
||||
|
||||
clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
|
||||
cv::ocl::PlanCache::PlanCache()
|
||||
: started(false),
|
||||
planStore(vector<cv::ocl::FftPlan *>()),
|
||||
setupData(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
cv::ocl::PlanCache::~PlanCache()
|
||||
{
|
||||
fft_teardown();
|
||||
}
|
||||
|
||||
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
|
||||
{
|
||||
PlanCache& pCache = *PlanCache::getPlanCache();
|
||||
vector<FftPlan *>& pStore = pCache.planStore;
|
||||
// go through search
|
||||
for(int i = 0; i < planStore.size(); i ++)
|
||||
for(size_t i = 0; i < pStore.size(); i ++)
|
||||
{
|
||||
FftPlan *plan = planStore[i];
|
||||
FftPlan *plan = pStore[i];
|
||||
if(
|
||||
plan->dft_size.width == _dft_size.width &&
|
||||
plan->dft_size.height == _dft_size.height &&
|
||||
@ -225,15 +255,31 @@ clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int
|
||||
plan->src_step == _src_step &&
|
||||
plan->dst_step == _dst_step &&
|
||||
plan->type == _type
|
||||
)
|
||||
)
|
||||
{
|
||||
return plan->plHandle;
|
||||
return plan;
|
||||
}
|
||||
}
|
||||
// no baked plan is found
|
||||
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
|
||||
planStore.push_back(newPlan);
|
||||
return newPlan->plHandle;
|
||||
pStore.push_back(newPlan);
|
||||
return newPlan;
|
||||
}
|
||||
|
||||
bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle)
|
||||
{
|
||||
PlanCache& pCache = *PlanCache::getPlanCache();
|
||||
vector<FftPlan *>& pStore = pCache.planStore;
|
||||
for(size_t i = 0; i < pStore.size(); i ++)
|
||||
{
|
||||
if(pStore[i]->getPlanHandle() == plHandle)
|
||||
{
|
||||
pStore.erase(pStore.begin() + i);
|
||||
delete pStore[i];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
|
||||
@ -245,19 +291,20 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
|
||||
// check if the given dft size is of optimal dft size
|
||||
CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
|
||||
|
||||
// the two flags are not compatible
|
||||
CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) );
|
||||
|
||||
// similar assertions with cuda module
|
||||
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
|
||||
|
||||
// we don't support DFT_SCALE flag
|
||||
CV_Assert(!(DFT_SCALE & flags));
|
||||
|
||||
bool is_1d_input = (src.rows == 1);
|
||||
int is_row_dft = flags & DFT_ROWS;
|
||||
int is_scaled_dft = flags & DFT_SCALE;
|
||||
//bool is_1d_input = (src.rows == 1);
|
||||
//int is_row_dft = flags & DFT_ROWS;
|
||||
//int is_scaled_dft = flags & DFT_SCALE;
|
||||
int is_inverse = flags & DFT_INVERSE;
|
||||
bool is_complex_input = src.channels() == 2;
|
||||
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
|
||||
|
||||
|
||||
// We don't support real-to-real transform
|
||||
CV_Assert(is_complex_input || is_complex_output);
|
||||
FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
|
||||
@ -268,12 +315,10 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
|
||||
dst.create(src.rows, src.cols, CV_32FC2);
|
||||
break;
|
||||
case R2C:
|
||||
CV_Assert(!is_row_dft); // this is not supported yet
|
||||
dst.create(src.rows, src.cols / 2 + 1, CV_32FC2);
|
||||
break;
|
||||
case C2R:
|
||||
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
|
||||
CV_Assert(!is_row_dft); // this is not supported yet
|
||||
dst.create(src.rows, dft_size.width, CV_32FC1);
|
||||
break;
|
||||
default:
|
||||
@ -282,13 +327,14 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
|
||||
throw exception();
|
||||
break;
|
||||
}
|
||||
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
|
||||
clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle();
|
||||
|
||||
//get the buffersize
|
||||
size_t buffersize = 0;
|
||||
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
|
||||
|
||||
//allocate the intermediate buffer
|
||||
// TODO, bind this with the current FftPlan
|
||||
cl_mem clMedBuffer = NULL;
|
||||
if (buffersize)
|
||||
{
|
||||
@ -297,17 +343,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
|
||||
openCLSafeCall( medstatus );
|
||||
}
|
||||
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
|
||||
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
|
||||
1,
|
||||
&src.clCxt->impl->clCmdQueue,
|
||||
0, NULL, NULL,
|
||||
(cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
|
||||
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
|
||||
1,
|
||||
&src.clCxt->impl->clCmdQueue,
|
||||
0, NULL, NULL,
|
||||
(cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
|
||||
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
|
||||
if(clMedBuffer)
|
||||
{
|
||||
openCLFree(clMedBuffer);
|
||||
}
|
||||
//fft_teardown();
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif //HAVE_CLAMDFFT
|
||||
|
@ -48,50 +48,59 @@ using namespace std;
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Dft
|
||||
PARAM_TEST_CASE(Dft, cv::Size, bool)
|
||||
PARAM_TEST_CASE(Dft, cv::Size, int)
|
||||
{
|
||||
cv::Size dft_size;
|
||||
bool dft_rows;
|
||||
//std::vector<cv::ocl::Info> oclinfo;
|
||||
int dft_flags;
|
||||
virtual void SetUp()
|
||||
{
|
||||
//int devnums = getDevice(oclinfo);
|
||||
// CV_Assert(devnums > 0);
|
||||
dft_size = GET_PARAM(0);
|
||||
dft_rows = GET_PARAM(1);
|
||||
dft_size = GET_PARAM(0);
|
||||
dft_flags = GET_PARAM(1);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Dft, C2C)
|
||||
{
|
||||
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
|
||||
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0);
|
||||
cv::Mat b_gold;
|
||||
int flags = 0;
|
||||
flags |= dft_rows ? cv::DFT_ROWS : 0;
|
||||
|
||||
cv::ocl::oclMat d_b;
|
||||
|
||||
cv::dft(a, b_gold, flags);
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
||||
cv::dft(a, b_gold, dft_flags);
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
|
||||
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
||||
}
|
||||
|
||||
TEST_P(Dft, R2C)
|
||||
{
|
||||
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0);
|
||||
cv::Mat b_gold, b_gold_roi;
|
||||
|
||||
cv::ocl::oclMat d_b, d_c;
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
|
||||
cv::dft(a, b_gold, cv::DFT_COMPLEX_OUTPUT | dft_flags);
|
||||
|
||||
b_gold_roi = b_gold(cv::Rect(0, 0, d_b.cols, d_b.rows));
|
||||
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
||||
|
||||
cv::Mat c_gold;
|
||||
cv::dft(b_gold, c_gold, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
|
||||
EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
||||
}
|
||||
|
||||
TEST_P(Dft, R2CthenC2R)
|
||||
{
|
||||
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
|
||||
|
||||
int flags = 0;
|
||||
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
|
||||
|
||||
cv::ocl::oclMat d_b, d_c;
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
||||
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0);
|
||||
cv::ocl::dft(d_b, d_c, a.size(), cv::DFT_SCALE | cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT);
|
||||
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
|
||||
testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
|
||||
testing::Values(false, true)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
|
||||
testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
|
||||
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
|
||||
|
||||
#endif // HAVE_CLAMDFFT
|
||||
|
Loading…
Reference in New Issue
Block a user