diff --git a/CMakeLists.txt b/CMakeLists.txt index 62709b805..e048156a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,6 +140,9 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF) OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) ) +OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" OFF IF (NOT ANDROID AND NOT IOS) ) +OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" OFF IF (NOT ANDROID AND NOT IOS) ) + # OpenCV build components # =================================================== @@ -396,6 +399,12 @@ if(WITH_OPENCL) if(OPENCL_FOUND) set(HAVE_OPENCL 1) endif() + if(WITH_OPENCLAMDFFT) + set(HAVE_CLAMDFFT 1) + endif() + if(WITH_OPENCLAMDBLAS) + set(HAVE_CLAMDBLAS 1) + endif() endif() # ---------------------------------------------------------------------------- diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake index 903b55b65..96473a500 100644 --- a/cmake/OpenCVDetectOpenCL.cmake +++ b/cmake/OpenCVDetectOpenCL.cmake @@ -2,8 +2,19 @@ if(APPLE) set(OPENCL_FOUND YES) set(OPENCL_LIBRARIES "-framework OpenCL") else() - find_package(OpenCL QUIET) - + #find_package(OpenCL QUIET) + if(WITH_OPENCLAMDFFT) + find_path(CLAMDFFT_INCLUDE_DIR + NAMES clAmdFft.h) + find_library(CLAMDFFT_LIBRARIES + NAMES clAmdFft.Runtime) + endif() + if(WITH_OPENCLAMDBLAS) + find_path(CLAMDBLAS_INCLUDE_DIR + NAMES clAmdBlas.h) + find_library(CLAMDBLAS_LIBRARIES + NAMES clAmdBlas) + endif() # Try AMD/ATI Stream SDK if (NOT OPENCL_FOUND) set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT}) diff --git a/cmake/templates/cvconfig.h.cmake b/cmake/templates/cvconfig.h.cmake index 101200805..fb779c887 100644 --- a/cmake/templates/cvconfig.h.cmake +++ b/cmake/templates/cvconfig.h.cmake @@ -175,6 +175,12 @@ /* OpenCL Support */ #cmakedefine HAVE_OPENCL +/* AMD's OpenCL Fast Fourier Transform Library*/ +#cmakedefine HAVE_CLAMDFFT + +/* AMD's Basic Linear Algebra Subprograms Library*/ +#cmakedefine HAVE_CLAMDBLAS + /* NVidia Cuda Fast Fourier Transform (FFT) API*/ #cmakedefine HAVE_CUFFT diff --git a/modules/ocl/CMakeLists.txt b/modules/ocl/CMakeLists.txt index b331e9f1e..24595770d 100644 --- a/modules/ocl/CMakeLists.txt +++ b/modules/ocl/CMakeLists.txt @@ -29,6 +29,14 @@ if (HAVE_OPENCL) if(OPENCL_INCLUDE_DIR) ocv_include_directories(${OPENCL_INCLUDE_DIR}) endif() + if (HAVE_CLAMDFFT) + set(ocl_link_libs ${ocl_link_libs} ${CLAMDFFT_LIBRARIES}) + ocv_include_directories(${CLAMDFFT_INCLUDE_DIR}) + endif() + if (HAVE_CLAMDBLAS) + set(ocl_link_libs ${ocl_link_libs} ${CLAMDBLAS_LIBRARIES}) + ocv_include_directories(${CLAMDBLAS_INCLUDE_DIR}) + endif() endif() ocv_set_module_sources( diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 517e9ffce..3bc6729e4 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -894,7 +894,35 @@ namespace cv // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf); +#ifdef HAVE_CLAMDFFT + ///////////////////////////////////////// clAmdFft related ///////////////////////////////////////// + // the two functions must be called before/after run any fft library functions. + CV_EXPORTS void fft_setup(); // this will be implicitly invoked + CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually + /////////////////////////////////////// DFT ///////////////////////////////////////////////////// + //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. + //! Param dft_size is the size of DFT transform. + //! + //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format. + // support src type of CV32FC1, CV32FC2 + // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS + // dft_size is the size of original input, which is used for transformation from complex to real. + // dft_size must be powers of 2, 3 and 5 + // real to complex dft requires at least v1.8 clAmdFft + // real to complex dft output is not the same with cpu version + // real to complex and complex to real does not support DFT_ROWS + CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0); +#endif // HAVE_CLAMDFFT + +#ifdef HAVE_CLAMDBLAS + //! implements generalized matrix product algorithm GEMM from BLAS + // The functionality requires clAmdBlas library + // only support type CV_32FC1 + // flag GEMM_3_T is not supported + CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha, + const oclMat& src3, double beta, oclMat& dst, int flags = 0); +#endif } } diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp new file mode 100644 index 000000000..b3eda35c1 --- /dev/null +++ b/modules/ocl/src/fft.cpp @@ -0,0 +1,302 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include +#include "precomp.hpp" + +#ifdef HAVE_CLAMDFFT + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +#if !defined (HAVE_OPENCL) +void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); } +#else + +#include + +namespace cv{ namespace ocl { + enum FftType + { + C2R = 1, // complex to complex + R2C = 2, // real to opencl HERMITIAN_INTERLEAVED + C2C = 3 // opencl HERMITIAN_INTERLEAVED to real + }; + struct FftPlan + { + friend void fft_setup(); + friend void fft_teardown(); + ~FftPlan(); + protected: + FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + const Size dft_size; + const int src_step, dst_step; + const int flags; + const FftType type; + clAmdFftPlanHandle plHandle; + static vector planStore; + static bool started; + static clAmdFftSetupData * setupData; + public: + // return a baked plan-> + // if there is one matched plan, return it + // if not, bake a new one, put it into the planStore and return it. + static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + }; +}} +bool cv::ocl::FftPlan::started = false; +vector cv::ocl::FftPlan::planStore = vector(); +clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0; + +void cv::ocl::fft_setup() +{ + if(FftPlan::started) + { + return; + } + FftPlan::setupData = new clAmdFftSetupData; + openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData )); + FftPlan::started = true; +} +void cv::ocl::fft_teardown() +{ + if(!FftPlan::started) + { + return; + } + delete FftPlan::setupData; + for(int i = 0; i < FftPlan::planStore.size(); i ++) + { + delete FftPlan::planStore[i]; + } + FftPlan::planStore.clear(); + openCLSafeCall( clAmdFftTeardown( ) ); + FftPlan::started = false; +} + +// bake a new plan +cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) + : dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0) +{ + if(!FftPlan::started) + { + // implicitly do fft setup + fft_setup(); + } + + bool is_1d_input = (_dft_size.height == 1); + int is_row_dft = flags & DFT_ROWS; + int is_scaled_dft = flags & DFT_SCALE; + int is_inverse = flags & DFT_INVERSE; + + clAmdFftResultLocation place; + clAmdFftLayout inLayout; + clAmdFftLayout outLayout; + clAmdFftDim dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D; + + size_t batchSize = is_row_dft?dft_size.height : 1; + size_t clLengthsIn[ 3 ] = {1, 1, 1}; + size_t clStridesIn[ 3 ] = {1, 1, 1}; + size_t clLengthsOut[ 3 ] = {1, 1, 1}; + size_t clStridesOut[ 3 ] = {1, 1, 1}; + clLengthsIn[0] = dft_size.width; + clLengthsIn[1] = is_row_dft ? 1 : dft_size.height; + clStridesIn[0] = 1; + clStridesOut[0] = 1; + + switch(_type) + { + case C2C: + inLayout = CLFFT_COMPLEX_INTERLEAVED; + outLayout = CLFFT_COMPLEX_INTERLEAVED; + clStridesIn[1] = src_step / sizeof(std::complex); + clStridesOut[1] = clStridesIn[1]; + break; + case R2C: + CV_Assert(!is_row_dft); // this is not supported yet + inLayout = CLFFT_REAL; + outLayout = CLFFT_HERMITIAN_INTERLEAVED; + clStridesIn[1] = src_step / sizeof(float); + clStridesOut[1] = dst_step / sizeof(std::complex); + break; + case C2R: + CV_Assert(!is_row_dft); // this is not supported yet + inLayout = CLFFT_HERMITIAN_INTERLEAVED; + outLayout = CLFFT_REAL; + clStridesIn[1] = src_step / sizeof(std::complex); + clStridesOut[1] = dst_step / sizeof(float); + break; + default: + //std::runtime_error("does not support this convertion!"); + cout << "Does not support this convertion!" << endl; + throw exception(); + break; + } + + clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1]; + clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1]; + + openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) ); + + openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); + openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); + openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) ); + + openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) ); + openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) ); + openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) ); + openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) ); +} +cv::ocl::FftPlan::~FftPlan() +{ + for(int i = 0; i < planStore.size(); i ++) + { + if(planStore[i]->plHandle == plHandle) + { + planStore.erase(planStore.begin()+ i); + } + } + openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) ); +} + +clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) +{ + // go through search + for(int i = 0; i < planStore.size(); i ++) + { + FftPlan * plan = planStore[i]; + if( + plan->dft_size.width == _dft_size.width && + plan->dft_size.height == _dft_size.height && + plan->flags == _flags && + plan->src_step == _src_step && + plan->dst_step == _dst_step && + plan->type == _type + ) + { + return plan->plHandle; + } + } + // no baked plan is found + FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type); + planStore.push_back(newPlan); + return newPlan->plHandle; +} + +void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags) +{ + if(dft_size == Size(0,0)) + { + dft_size = src.size(); + } + // check if the given dft size is of optimal dft size + CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area())); + + // similar assertions with cuda module + CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); + + // we don't support DFT_SCALE flag + CV_Assert(!(DFT_SCALE & flags)); + + bool is_1d_input = (src.rows == 1); + int is_row_dft = flags & DFT_ROWS; + int is_scaled_dft = flags & DFT_SCALE; + int is_inverse = flags & DFT_INVERSE; + bool is_complex_input = src.channels() == 2; + bool is_complex_output = !(flags & DFT_REAL_OUTPUT); + + // We don't support real-to-real transform + CV_Assert(is_complex_input || is_complex_output); + FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1); + + switch(type) + { + case C2C: + dst.create(src.rows, src.cols, CV_32FC2); + break; + case R2C: + CV_Assert(!is_row_dft); // this is not supported yet + dst.create(src.rows, src.cols/2 + 1, CV_32FC2); + break; + case C2R: + CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows); + CV_Assert(!is_row_dft); // this is not supported yet + dst.create(src.rows, dft_size.width, CV_32FC1); + break; + default: + //std::runtime_error("does not support this convertion!"); + cout << "Does not support this convertion!" << endl; + throw exception(); + break; + } + clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type); + + //get the buffersize + size_t buffersize=0; + openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) ); + + //allocate the intermediate buffer + cl_mem clMedBuffer=NULL; + if (buffersize) + { + cl_int medstatus; + clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus); + openCLSafeCall( medstatus ); + } + openCLSafeCall( clAmdFftEnqueueTransform( plHandle, + is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD, + 1, + &src.clCxt->impl->clCmdQueue, + 0, NULL, NULL, + (cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) ); + openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) ); + if(clMedBuffer) + { + openCLFree(clMedBuffer); + } +} + +#endif +#endif //HAVE_CLAMDFFT diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp new file mode 100644 index 000000000..c35e06182 --- /dev/null +++ b/modules/ocl/src/gemm.cpp @@ -0,0 +1,161 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include +#include "precomp.hpp" + +#ifdef HAVE_CLAMDBLAS + +#include "clAmdBlas.h" + +#if !defined (HAVE_OPENCL) +void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); } +#else + +using namespace cv; + + void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha, + const oclMat& src3, double beta, oclMat& dst, int flags) + { + CV_Assert(src1.cols == src2.rows && + (src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols)); + CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported + if(!src3.empty()) + { + src3.copyTo(dst); + } + else + { + dst.create(src1.rows, src2.cols, src1.type()); + dst.setTo(Scalar::all(0)); + } + openCLSafeCall( clAmdBlasSetup() ); + + const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans; + const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans; + const clAmdBlasOrder order = clAmdBlasRowMajor; + + const int M = src1.rows; + const int N = src2.cols; + const int K = src1.cols; + int lda = src1.step; + int ldb = src2.step; + int ldc = dst.step; + int offa = src1.offset; + int offb = src2.offset; + int offc = dst.offset; + + + switch(src1.type()) + { + case CV_32FC1: + lda /= sizeof(float); + ldb /= sizeof(float); + ldc /= sizeof(float); + offa /= sizeof(float); + offb /= sizeof(float); + offc /= sizeof(float); + openCLSafeCall + ( + clAmdBlasSgemmEx(order, transA, transB, M, N, K, + alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, + beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + ); + break; + case CV_64FC1: + lda /= sizeof(double); + ldb /= sizeof(double); + ldc /= sizeof(double); + offa /= sizeof(double); + offb /= sizeof(double); + offc /= sizeof(double); + openCLSafeCall + ( + clAmdBlasDgemmEx(order, transA, transB, M, N, K, + alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, + beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + ); + break; + case CV_32FC2: + { + lda /= sizeof(std::complex); + ldb /= sizeof(std::complex); + ldc /= sizeof(std::complex); + offa /= sizeof(std::complex); + offb /= sizeof(std::complex); + offc /= sizeof(std::complex); + cl_float2 alpha_2 = {{alpha, 0}}; + cl_float2 beta_2 = {{beta, 0}}; + openCLSafeCall + ( + clAmdBlasCgemmEx(order, transA, transB, M, N, K, + alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, + beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + ); + } + break; + case CV_64FC2: + { + lda /= sizeof(std::complex); + ldb /= sizeof(std::complex); + ldc /= sizeof(std::complex); + offa /= sizeof(std::complex); + offb /= sizeof(std::complex); + offc /= sizeof(std::complex); + cl_double2 alpha_2 = {{alpha, 0}}; + cl_double2 beta_2 = {{beta, 0}}; + openCLSafeCall + ( + clAmdBlasZgemmEx(order, transA, transB, M, N, K, + alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, + beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + ); + } + break; + } + clAmdBlasTeardown(); + } +#endif +#endif diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp new file mode 100644 index 000000000..4b51d4fec --- /dev/null +++ b/modules/ocl/test/test_fft.cpp @@ -0,0 +1,97 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +using namespace std; +#ifdef HAVE_CLAMDFFT +//////////////////////////////////////////////////////////////////////////// +// Dft +PARAM_TEST_CASE(Dft, cv::Size, bool) +{ + cv::Size dft_size; + bool dft_rows; + std::vector oclinfo; + virtual void SetUp() + { + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + dft_size = GET_PARAM(0); + dft_rows = GET_PARAM(1); + } +}; + +TEST_P(Dft, C2C) +{ + cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); + cv::Mat b_gold; + int flags = 0; + flags |= dft_rows ? cv::DFT_ROWS : 0; + + cv::ocl::oclMat d_b; + + cv::dft(a, b_gold, flags); + cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); + EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, ""); +} + + +TEST_P(Dft, R2CthenC2R) +{ + cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); + + int flags = 0; + //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet + + cv::ocl::oclMat d_b, d_c; + cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); + cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); + EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); +} + +INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine( + testing::Values(cv::Size(5, 4), cv::Size(20, 20)), + testing::Values(false, true))); + +#endif // HAVE_CLAMDFFT diff --git a/modules/ocl/test/test_gemm.cpp b/modules/ocl/test/test_gemm.cpp new file mode 100644 index 000000000..a836149cb --- /dev/null +++ b/modules/ocl/test/test_gemm.cpp @@ -0,0 +1,85 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#include "precomp.hpp" +using namespace std; +#ifdef HAVE_CLAMDBLAS +//////////////////////////////////////////////////////////////////////////// +// GEMM +PARAM_TEST_CASE(Gemm, int, cv::Size, int) +{ + int type; + cv::Size mat_size; + int flags; + vector info; + virtual void SetUp() + { + type = GET_PARAM(0); + mat_size = GET_PARAM(1); + flags = GET_PARAM(2); + cv::ocl::getDevice(info); + } +}; + +TEST_P(Gemm, Accuracy) +{ + cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); + cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); + cv::Mat c = randomMat(mat_size, type, 0.0, 10.0); + + cv::Mat dst; + cv::ocl::oclMat ocl_dst; + + cv::gemm(a, b, 1.0, c, 1.0, dst, flags); + cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags); + + EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, ""); +} + +INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine( + testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/), + testing::Values(cv::Size(20, 20), cv::Size(300, 300)), + testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T))); +#endif