diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index cb72c25d5..ebe3d7807 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -49,6 +49,7 @@ namespace cv { namespace ocl { CV_EXPORTS bool haveOpenCL(); CV_EXPORTS bool useOpenCL(); CV_EXPORTS bool haveAmdBlas(); +CV_EXPORTS bool haveAmdFft(); CV_EXPORTS void setUseOpenCL(bool flag); CV_EXPORTS void finish2(); diff --git a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp index 6f6f1bcdd..b370f0e36 100644 --- a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp +++ b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp @@ -95,7 +95,7 @@ #undef clAmdFftSetPlanOutStride #define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn #undef clAmdFftSetPlanPrecision -//#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn +#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn #undef clAmdFftSetPlanScale #define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn #undef clAmdFftSetPlanTransposeResult @@ -134,7 +134,7 @@ extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanH extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths); extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); -//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision); extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale); //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed); extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness); diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index e6fed4eae..f168478af 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -40,6 +40,8 @@ //M*/ #include "precomp.hpp" +#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" +#include "opencv2/core/opencl/runtime/opencl_core.hpp" namespace cv { @@ -1473,8 +1475,261 @@ typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, in typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*); #endif +#ifdef HAVE_CLAMDFFT + +namespace cv { + +#define CLAMDDFT_Assert(func) \ + { \ + clAmdFftStatus s = (func); \ + CV_Assert(s == CLFFT_SUCCESS); \ + } + +enum FftType +{ + R2R = 0, // real to real + C2R = 1, // opencl HERMITIAN_INTERLEAVED to real + R2C = 2, // real to opencl HERMITIAN_INTERLEAVED + C2C = 3 // complex to complex +}; + +class PlanCache +{ + struct FftPlan + { + FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) : + dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), + doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType), plHandle(0) + { + bool dft_inverse = (flags & DFT_INVERSE) != 0; + bool dft_scale = (flags & DFT_SCALE) != 0; + bool dft_rows = (flags & DFT_ROWS) != 0; + + clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL; + clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D; + + size_t batchSize = dft_rows ? dft_size.height : 1; + size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 }; + size_t clStridesIn[3] = { 1, 1, 1 }; + size_t clStridesOut[3] = { 1, 1, 1 }; + int elemSize = doubleFP ? sizeof(double) : sizeof(float); + + switch (fftType) + { + case C2C: + inLayout = CLFFT_COMPLEX_INTERLEAVED; + outLayout = CLFFT_COMPLEX_INTERLEAVED; + clStridesIn[1] = src_step / (elemSize << 1); + clStridesOut[1] = dst_step / (elemSize << 1); + break; + case R2C: + inLayout = CLFFT_REAL; + outLayout = CLFFT_HERMITIAN_INTERLEAVED; + clStridesIn[1] = src_step / elemSize; + clStridesOut[1] = dst_step / (elemSize << 1); + break; + case C2R: + inLayout = CLFFT_HERMITIAN_INTERLEAVED; + outLayout = CLFFT_REAL; + clStridesIn[1] = src_step / (elemSize << 1); + clStridesOut[1] = dst_step / elemSize; + break; + case R2R: + default: + CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type"); + break; + } + + clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1]; + clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1]; + + // TODO remove all plans if context changed + CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context2::getDefault().ptr(), dim, clLengthsIn)) + + // setting plan properties + CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE)); + CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE)) + CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout)) + CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize)) + CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn)) + CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut)) + CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim])) + + float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f; + CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale)) + + // ready to bake + cl_command_queue commandQueue = (cl_command_queue)ocl::Queue::getDefault().ptr(); + CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &commandQueue, NULL, NULL)) + } + + ~FftPlan() + { +// clAmdFftDestroyPlan(&plHandle); + } + + friend class PlanCache; + + private: + Size dft_size; + int src_step, dst_step; + bool doubleFP; + bool inplace; + int flags; + FftType fftType; + + cl_context context; + clAmdFftPlanHandle plHandle; + }; + +public: + static PlanCache & getInstance() + { + static PlanCache planCache; + return planCache; + } + + clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP, + bool inplace, int flags, FftType fftType) + { + cl_context currentContext = (cl_context)ocl::Context2::getDefault().ptr(); + + for (size_t i = 0, size = planStorage.size(); i < size; i ++) + { + const FftPlan * const plan = planStorage[i]; + if (plan->dft_size == dft_size && + plan->flags == flags && + plan->src_step == src_step && + plan->dst_step == dst_step && + plan->doubleFP == doubleFP && + plan->fftType == fftType && + plan->inplace == inplace) + { + if (plan->context != currentContext) + { + planStorage.erase(planStorage.begin() + i); + break; + } + + return plan->plHandle; + } + } + + // no baked plan is found, so let's create a new one + FftPlan * newPlan = new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType); + planStorage.push_back(newPlan); + + return newPlan->plHandle; + } + + ~PlanCache() + { + for (std::vector::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i) + delete (*i); + planStorage.clear(); + } + +protected: + PlanCache() : + planStorage() + { + } + + std::vector planStorage; +}; + +extern "C" { + +static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p) +{ + UMatData * u = (UMatData *)p; + + if( u && CV_XADD(&u->urefcount, -1) == 1 ) + u->currAllocator->deallocate(u); + u = 0; + + clReleaseEvent(e), e = 0; +} + +} + +static bool ocl_dft(InputArray _src, OutputArray _dst, int flags) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + Size ssize = _src.size(); + + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + if ( (!doubleSupport && depth == CV_64F) || + !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) || + _src.offset() != 0) + return false; + + // if is not a multiplication of prime numbers { 2, 3, 5 } + if (ssize.area() != getOptimalDFTSize(ssize.area())) + return false; + + int dst_complex_input = cn == 2 ? 1 : 0; + bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0; + int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0; + bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0; + + CV_Assert(dft_complex_output + dft_real_output < 2); + FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1); + + switch (fftType) + { + case C2C: + _dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2)); + break; + case R2C: // TODO implement it if possible + case C2R: // TODO implement it if possible + case R2R: // AMD Fft does not support this type + default: + return false; + } + + UMat src = _src.getUMat(), dst = _dst.getUMat(); + bool inplace = src.u == dst.u; + + clAmdFftPlanHandle plHandle = PlanCache::getInstance(). + getPlanHandle(ssize, (int)src.step, (int)dst.step, + depth == CV_64F, inplace, flags, fftType); + + // get the bufferSize + size_t bufferSize = 0; + CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize)) + UMat tmpBuffer(1, (int)bufferSize, CV_8UC1); + + cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ); + cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW); + + cl_command_queue commandQueue = (cl_command_queue)ocl::Queue::getDefault().ptr(); + cl_event e = 0; + + CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, + 1, &commandQueue, 0, NULL, &e, + &srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW))) + + tmpBuffer.addref(); + clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u); + + return true; +} + +#undef DFT_ASSERT + +} + +#endif // HAVE_CLAMDFFT + void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) { +#ifdef HAVE_CLAMDFFT + if (ocl::useOpenCL() && ocl::haveAmdFft() && _dst.isUMat() && _src0.dims() <= 2 + && nonzero_rows == 0 && ocl_dft(_src0, _dst, flags)) + return; +#endif + static DFTFunc dft_tbl[6] = { (DFTFunc)DFT_32f, diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index b763334eb..b1d864832 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -43,6 +43,7 @@ #include #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" +#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" #ifdef HAVE_OPENCL #include "opencv2/core/opencl/runtime/opencl_core.hpp" @@ -1423,6 +1424,83 @@ bool haveAmdBlas() #endif +#ifdef HAVE_CLAMDFFT + +class AmdFftHelper +{ +public: + static AmdFftHelper & getInstance() + { + static AmdFftHelper amdFft; + return amdFft; + } + + bool isAvailable() const + { + return g_isAmdFftAvailable; + } + + ~AmdFftHelper() + { + try + { +// clAmdFftTeardown(); + } + catch (...) { } + } + +protected: + AmdFftHelper() + { + if (!g_isAmdFftInitialized) + { + AutoLock lock(m); + + if (!g_isAmdFftInitialized && haveOpenCL()) + { + try + { + CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS); + g_isAmdFftAvailable = true; + } + catch (const Exception &) + { + g_isAmdFftAvailable = false; + } + } + else + g_isAmdFftAvailable = false; + + g_isAmdFftInitialized = true; + } + } + +private: + static clAmdFftSetupData setupData; + static Mutex m; + static bool g_isAmdFftInitialized; + static bool g_isAmdFftAvailable; +}; + +clAmdFftSetupData AmdFftHelper::setupData; +bool AmdFftHelper::g_isAmdFftAvailable = false; +bool AmdFftHelper::g_isAmdFftInitialized = false; +Mutex AmdFftHelper::m; + +bool haveAmdFft() +{ + return AmdFftHelper::getInstance().isAvailable(); +} + +#else + +bool haveAmdFft() +{ + return false; +} + +#endif + void finish2() { Queue::getDefault().finish(); diff --git a/modules/core/src/opencl/runtime/autogenerated/opencl_clamdfft_impl.hpp b/modules/core/src/opencl/runtime/autogenerated/opencl_clamdfft_impl.hpp index d431d5d0e..1742ab606 100644 --- a/modules/core/src/opencl/runtime/autogenerated/opencl_clamdfft_impl.hpp +++ b/modules/core/src/opencl/runtime/autogenerated/opencl_clamdfft_impl.hpp @@ -33,7 +33,7 @@ enum OPENCLAMDFFT_FN_ID { OPENCLAMDFFT_FN_clAmdFftSetPlanInStride = 23, // OPENCLAMDFFT_FN_clAmdFftSetPlanLength = 24, OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride = 25, -// OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision = 26, + OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision = 26, OPENCLAMDFFT_FN_clAmdFftSetPlanScale = 27, // OPENCLAMDFFT_FN_clAmdFftSetPlanTransposeResult = 28, OPENCLAMDFFT_FN_clAmdFftSetResultLocation = 29, @@ -334,9 +334,9 @@ clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle, const clAmdFftDim openclamdfft_fn3::switch_fn; static const struct DynamicFnEntry clAmdFftSetPlanOutStride_definition = { "clAmdFftSetPlanOutStride", (void**)&clAmdFftSetPlanOutStride}; -//clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) = -// openclamdfft_fn2::switch_fn; -//static const struct DynamicFnEntry clAmdFftSetPlanPrecision_definition = { "clAmdFftSetPlanPrecision", (void**)&clAmdFftSetPlanPrecision}; +clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) = + openclamdfft_fn2::switch_fn; +static const struct DynamicFnEntry clAmdFftSetPlanPrecision_definition = { "clAmdFftSetPlanPrecision", (void**)&clAmdFftSetPlanPrecision}; clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle, clAmdFftDirection, cl_float) = openclamdfft_fn3::switch_fn; @@ -387,7 +387,7 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = { &clAmdFftSetPlanInStride_definition, NULL/*&clAmdFftSetPlanLength_definition*/, &clAmdFftSetPlanOutStride_definition, - NULL/*&clAmdFftSetPlanPrecision_definition*/, + &clAmdFftSetPlanPrecision_definition, &clAmdFftSetPlanScale_definition, NULL/*&clAmdFftSetPlanTransposeResult_definition*/, &clAmdFftSetResultLocation_definition, @@ -396,4 +396,4 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = { ADDITIONAL_FN_DEFINITIONS // macro for custom functions }; -// number of enabled functions: 14 +// number of enabled functions: 15 diff --git a/modules/core/src/opencl/runtime/generator/filter/opencl_clamdfft_functions.list b/modules/core/src/opencl/runtime/generator/filter/opencl_clamdfft_functions.list index 7f407ff02..1f9820a1c 100644 --- a/modules/core/src/opencl/runtime/generator/filter/opencl_clamdfft_functions.list +++ b/modules/core/src/opencl/runtime/generator/filter/opencl_clamdfft_functions.list @@ -5,7 +5,7 @@ clAmdFftDestroyPlan clAmdFftEnqueueTransform //clAmdFftGetLayout //clAmdFftGetPlanBatchSize -//clAmdFftGetPlanContext +clAmdFftGetPlanContext //clAmdFftGetPlanDim //clAmdFftGetPlanDistance //clAmdFftGetPlanInStride @@ -22,9 +22,9 @@ clAmdFftSetPlanBatchSize //clAmdFftSetPlanDim clAmdFftSetPlanDistance clAmdFftSetPlanInStride -//clAmdFftSetPlanLength +clAmdFftSetPlanLength clAmdFftSetPlanOutStride -//clAmdFftSetPlanPrecision +clAmdFftSetPlanPrecision clAmdFftSetPlanScale //clAmdFftSetPlanTransposeResult clAmdFftSetResultLocation diff --git a/modules/core/test/ocl/test_dft.cpp b/modules/core/test/ocl/test_dft.cpp new file mode 100644 index 000000000..5a596f382 --- /dev/null +++ b/modules/core/test/ocl/test_dft.cpp @@ -0,0 +1,113 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +//////////////////////////////////////////////////////////////////////////// +// Dft + +PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool) +{ + cv::Size dft_size; + int dft_flags, depth; + bool inplace; + + TEST_DECLARE_INPUT_PARAMETER(src) + TEST_DECLARE_OUTPUT_PARAMETER(dst) + + virtual void SetUp() + { + dft_size = GET_PARAM(0); + depth = GET_PARAM(1); + inplace = GET_PARAM(2); + + dft_flags = 0; + if (GET_PARAM(3)) + dft_flags |= cv::DFT_ROWS; + if (GET_PARAM(4)) + dft_flags |= cv::DFT_SCALE; + if (GET_PARAM(5)) + dft_flags |= cv::DFT_INVERSE; + } + + void generateTestData(int cn = 2) + { + src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0); + usrc = src.getUMat(ACCESS_READ); + + if (inplace) + dst = src, udst = usrc; + } +}; + +OCL_TEST_P(Dft, C2C) +{ + generateTestData(); + + OCL_OFF(cv::dft(src, dst, dft_flags | cv::DFT_COMPLEX_OUTPUT)); + OCL_ON(cv::dft(usrc, udst, dft_flags | cv::DFT_COMPLEX_OUTPUT)); + + double eps = src.size().area() * 1e-4; + EXPECT_MAT_NEAR(dst, udst, eps); +} + +OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), + cv::Size(512, 1), cv::Size(1024, 768)), + Values(CV_32F, CV_64F), + Bool(), // inplace + Bool(), // DFT_ROWS + Bool(), // DFT_SCALE + Bool()) // DFT_INVERSE + ); + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index 70711bb46..395f14fba 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -48,43 +48,53 @@ using namespace cv; using namespace cv::ocl; #if !defined HAVE_CLAMDFFT + void cv::ocl::dft(const oclMat&, oclMat&, Size, int) { CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented"); } + namespace cv { namespace ocl { void fft_teardown(); -}} -void cv::ocl::fft_teardown(){} +} } + +void cv::ocl::fft_teardown() { } + #else + #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" + namespace cv { namespace ocl { void fft_setup(); void fft_teardown(); + enum FftType { C2R = 1, // complex to complex R2C = 2, // real to opencl HERMITIAN_INTERLEAVED C2C = 3 // opencl HERMITIAN_INTERLEAVED to real }; + struct FftPlan { protected: clAmdFftPlanHandle plHandle; FftPlan& operator=(const FftPlan&); public: - FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type); ~FftPlan(); inline clAmdFftPlanHandle getPlanHandle() { return plHandle; } const Size dft_size; const int src_step, dst_step; + const int depth; const int flags; const FftType type; }; + class PlanCache { protected: @@ -105,10 +115,11 @@ namespace cv planCache = new PlanCache(); return planCache; } + // return a baked plan-> // if there is one matched plan, return it // if not, bake a new one, put it into the planStore and return it. - static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type); // remove a single plan from the store // return true if the plan is successfully removed @@ -117,6 +128,7 @@ namespace cv }; } } + PlanCache* PlanCache::planCache = NULL; void cv::ocl::fft_setup() @@ -128,9 +140,11 @@ void cv::ocl::fft_setup() } if (pCache.setupData == NULL) pCache.setupData = new clAmdFftSetupData; + openCLSafeCall(clAmdFftInitSetupData( pCache.setupData )); pCache.started = true; } + void cv::ocl::fft_teardown() { PlanCache& pCache = *PlanCache::getPlanCache(); @@ -154,8 +168,8 @@ void cv::ocl::fft_teardown() } // bake a new plan -cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) - : plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type) +cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type) + : plHandle(0), dft_size(_dft_size), src_step(_src_step), depth(_depth), dst_step(_dst_step), flags(_flags), type(_type) { fft_setup(); @@ -184,20 +198,20 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla case C2C: inLayout = CLFFT_COMPLEX_INTERLEAVED; outLayout = CLFFT_COMPLEX_INTERLEAVED; - clStridesIn[1] = src_step / (2*sizeof(float)); - clStridesOut[1] = clStridesIn[1]; + clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth)); + clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth)); break; case R2C: inLayout = CLFFT_REAL; outLayout = CLFFT_HERMITIAN_INTERLEAVED; - clStridesIn[1] = src_step / sizeof(float); - clStridesOut[1] = dst_step / (2*sizeof(float)); + clStridesIn[1] = src_step / CV_ELEM_SIZE(_depth); + clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth)); break; case C2R: inLayout = CLFFT_HERMITIAN_INTERLEAVED; outLayout = CLFFT_REAL; - clStridesIn[1] = src_step / (2*sizeof(float)); - clStridesOut[1] = dst_step / sizeof(float); + clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth)); + clStridesOut[1] = dst_step / CV_ELEM_SIZE(_depth); break; default: //std::runtime_error("does not support this convertion!"); @@ -211,6 +225,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) ); + openCLSafeCall( clAmdFftSetPlanPrecision( plHandle, depth == CV_64F ? CLFFT_DOUBLE : CLFFT_SINGLE ) ); openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) ); @@ -225,6 +240,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla //ready to bake openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) ); } + cv::ocl::FftPlan::~FftPlan() { openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) ); @@ -242,7 +258,7 @@ cv::ocl::PlanCache::~PlanCache() fft_teardown(); } -FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) +FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type) { PlanCache& pCache = *PlanCache::getPlanCache(); std::vector& pStore = pCache.planStore; @@ -256,6 +272,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste plan->flags == _flags && plan->src_step == _src_step && plan->dst_step == _dst_step && + plan->depth == _depth && plan->type == _type ) { @@ -263,7 +280,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste } } // no baked plan is found - FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type); + FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _depth, _flags, _type); pStore.push_back(newPlan); return newPlan; } @@ -286,6 +303,8 @@ bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle) void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) { + CV_Assert(cv::ocl::haveAmdFft()); + if(dft_size == Size(0, 0)) { dft_size = src.size(); @@ -296,9 +315,6 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) // the two flags are not compatible CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) ); - // similar assertions with cuda module - CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); - //bool is_1d_input = (src.rows == 1); //int is_row_dft = flags & DFT_ROWS; //int is_scaled_dft = flags & DFT_SCALE; @@ -306,6 +322,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) bool is_complex_input = src.channels() == 2; bool is_complex_output = !(flags & DFT_REAL_OUTPUT); + int depth = src.depth(); // We don't support real-to-real transform CV_Assert(is_complex_input || is_complex_output); @@ -314,14 +331,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) switch(type) { case C2C: - dst.create(src.rows, src.cols, CV_32FC2); + dst.create(src.rows, src.cols, CV_MAKE_TYPE(depth, 2)); + printf("C2C\n"); break; case R2C: - dst.create(src.rows, src.cols / 2 + 1, CV_32FC2); + dst.create(src.rows, src.cols / 2 + 1, CV_MAKE_TYPE(depth, 2)); + printf("R2C\n"); break; case C2R: CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows); - dst.create(src.rows, dft_size.width, CV_32FC1); + dst.create(src.rows, dft_size.width, CV_MAKE_TYPE(depth, 1)); + printf("C2R\n"); break; default: //std::runtime_error("does not support this convertion!"); @@ -329,7 +349,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) throw std::exception(); break; } - clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle(); + clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, depth, flags, type)->getPlanHandle(); //get the buffersize size_t buffersize = 0; @@ -356,7 +376,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) { openCLFree(clMedBuffer); } - //fft_teardown(); + fft_teardown(); } #endif diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index 25347ffde..9cdb07aae 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -75,6 +75,7 @@ #include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" +#include "opencv2/core/ocl.hpp" #define __ATI__ diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp index 20d88c21f..1c2a1da47 100644 --- a/modules/ocl/test/test_fft.cpp +++ b/modules/ocl/test/test_fft.cpp @@ -50,32 +50,36 @@ using namespace std; //////////////////////////////////////////////////////////////////////////// // Dft -PARAM_TEST_CASE(Dft, cv::Size, int) +PARAM_TEST_CASE(Dft, cv::Size, int, bool) { cv::Size dft_size; int dft_flags; + bool doubleFP; + virtual void SetUp() { dft_size = GET_PARAM(0); dft_flags = GET_PARAM(1); + doubleFP = GET_PARAM(2); } }; OCL_TEST_P(Dft, C2C) { - cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0); + cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC2 : CV_32FC2, 0.0, 100.0); cv::Mat b_gold; cv::ocl::oclMat d_b; cv::dft(a, b_gold, dft_flags); cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags); + EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4); } OCL_TEST_P(Dft, R2C) { - cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0); + cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 100.0); cv::Mat b_gold, b_gold_roi; cv::ocl::oclMat d_b, d_c; @@ -92,7 +96,7 @@ OCL_TEST_P(Dft, R2C) OCL_TEST_P(Dft, R2CthenC2R) { - cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); + cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 10.0); cv::ocl::oclMat d_b, d_c; cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0); @@ -102,7 +106,7 @@ OCL_TEST_P(Dft, R2CthenC2R) INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine( testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)), - testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) )); + testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE), testing::Bool())); //////////////////////////////////////////////////////////////////////////// // MulSpectrums