Add dft and gemm to ocl module, using AMD's clAmdFft and clAmdBlas libraries

This commit is contained in:
yao 2012-08-07 16:33:46 +08:00
parent 7741d585f5
commit c03ac12fcd
9 changed files with 709 additions and 2 deletions

View File

@ -140,6 +140,9 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) )
OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF)
OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" OFF IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" OFF IF (NOT ANDROID AND NOT IOS) )
# OpenCV build components
# ===================================================
@ -396,6 +399,12 @@ if(WITH_OPENCL)
if(OPENCL_FOUND)
set(HAVE_OPENCL 1)
endif()
if(WITH_OPENCLAMDFFT)
set(HAVE_CLAMDFFT 1)
endif()
if(WITH_OPENCLAMDBLAS)
set(HAVE_CLAMDBLAS 1)
endif()
endif()
# ----------------------------------------------------------------------------

View File

@ -2,8 +2,19 @@ if(APPLE)
set(OPENCL_FOUND YES)
set(OPENCL_LIBRARIES "-framework OpenCL")
else()
find_package(OpenCL QUIET)
#find_package(OpenCL QUIET)
if(WITH_OPENCLAMDFFT)
find_path(CLAMDFFT_INCLUDE_DIR
NAMES clAmdFft.h)
find_library(CLAMDFFT_LIBRARIES
NAMES clAmdFft.Runtime)
endif()
if(WITH_OPENCLAMDBLAS)
find_path(CLAMDBLAS_INCLUDE_DIR
NAMES clAmdBlas.h)
find_library(CLAMDBLAS_LIBRARIES
NAMES clAmdBlas)
endif()
# Try AMD/ATI Stream SDK
if (NOT OPENCL_FOUND)
set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT})

View File

@ -175,6 +175,12 @@
/* OpenCL Support */
#cmakedefine HAVE_OPENCL
/* AMD's OpenCL Fast Fourier Transform Library*/
#cmakedefine HAVE_CLAMDFFT
/* AMD's Basic Linear Algebra Subprograms Library*/
#cmakedefine HAVE_CLAMDBLAS
/* NVidia Cuda Fast Fourier Transform (FFT) API*/
#cmakedefine HAVE_CUFFT

View File

@ -29,6 +29,14 @@ if (HAVE_OPENCL)
if(OPENCL_INCLUDE_DIR)
ocv_include_directories(${OPENCL_INCLUDE_DIR})
endif()
if (HAVE_CLAMDFFT)
set(ocl_link_libs ${ocl_link_libs} ${CLAMDFFT_LIBRARIES})
ocv_include_directories(${CLAMDFFT_INCLUDE_DIR})
endif()
if (HAVE_CLAMDBLAS)
set(ocl_link_libs ${ocl_link_libs} ${CLAMDBLAS_LIBRARIES})
ocv_include_directories(${CLAMDBLAS_INCLUDE_DIR})
endif()
endif()
ocv_set_module_sources(

View File

@ -894,7 +894,35 @@ namespace cv
// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf);
#ifdef HAVE_CLAMDFFT
///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
// the two functions must be called before/after run any fft library functions.
CV_EXPORTS void fft_setup(); // this will be implicitly invoked
CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually
/////////////////////////////////////// DFT /////////////////////////////////////////////////////
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
//! Param dft_size is the size of DFT transform.
//!
//! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
// support src type of CV32FC1, CV32FC2
// support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
// dft_size is the size of original input, which is used for transformation from complex to real.
// dft_size must be powers of 2, 3 and 5
// real to complex dft requires at least v1.8 clAmdFft
// real to complex dft output is not the same with cpu version
// real to complex and complex to real does not support DFT_ROWS
CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0);
#endif // HAVE_CLAMDFFT
#ifdef HAVE_CLAMDBLAS
//! implements generalized matrix product algorithm GEMM from BLAS
// The functionality requires clAmdBlas library
// only support type CV_32FC1
// flag GEMM_3_T is not supported
CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha,
const oclMat& src3, double beta, oclMat& dst, int flags = 0);
#endif
}
}

302
modules/ocl/src/fft.cpp Normal file
View File

@ -0,0 +1,302 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
#ifdef HAVE_CLAMDFFT
using namespace cv;
using namespace cv::ocl;
using namespace std;
#if !defined (HAVE_OPENCL)
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
#else
#include <clAmdFft.h>
namespace cv{ namespace ocl {
enum FftType
{
C2R = 1, // complex to complex
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
};
struct FftPlan
{
friend void fft_setup();
friend void fft_teardown();
~FftPlan();
protected:
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
const Size dft_size;
const int src_step, dst_step;
const int flags;
const FftType type;
clAmdFftPlanHandle plHandle;
static vector<FftPlan*> planStore;
static bool started;
static clAmdFftSetupData * setupData;
public:
// return a baked plan->
// if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it.
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
};
}}
bool cv::ocl::FftPlan::started = false;
vector<cv::ocl::FftPlan*> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan*>();
clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0;
void cv::ocl::fft_setup()
{
if(FftPlan::started)
{
return;
}
FftPlan::setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData ));
FftPlan::started = true;
}
void cv::ocl::fft_teardown()
{
if(!FftPlan::started)
{
return;
}
delete FftPlan::setupData;
for(int i = 0; i < FftPlan::planStore.size(); i ++)
{
delete FftPlan::planStore[i];
}
FftPlan::planStore.clear();
openCLSafeCall( clAmdFftTeardown( ) );
FftPlan::started = false;
}
// bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
: dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0)
{
if(!FftPlan::started)
{
// implicitly do fft setup
fft_setup();
}
bool is_1d_input = (_dft_size.height == 1);
int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
clAmdFftResultLocation place;
clAmdFftLayout inLayout;
clAmdFftLayout outLayout;
clAmdFftDim dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D;
size_t batchSize = is_row_dft?dft_size.height : 1;
size_t clLengthsIn[ 3 ] = {1, 1, 1};
size_t clStridesIn[ 3 ] = {1, 1, 1};
size_t clLengthsOut[ 3 ] = {1, 1, 1};
size_t clStridesOut[ 3 ] = {1, 1, 1};
clLengthsIn[0] = dft_size.width;
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
clStridesIn[0] = 1;
clStridesOut[0] = 1;
switch(_type)
{
case C2C:
inLayout = CLFFT_COMPLEX_INTERLEAVED;
outLayout = CLFFT_COMPLEX_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(std::complex<float>);
clStridesOut[1] = clStridesIn[1];
break;
case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(float);
clStridesOut[1] = dst_step / sizeof(std::complex<float>);
break;
case C2R:
CV_Assert(!is_row_dft); // this is not supported yet
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / sizeof(std::complex<float>);
clStridesOut[1] = dst_step / sizeof(float);
break;
default:
//std::runtime_error("does not support this convertion!");
cout << "Does not support this convertion!" << endl;
throw exception();
break;
}
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) );
openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) );
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
}
cv::ocl::FftPlan::~FftPlan()
{
for(int i = 0; i < planStore.size(); i ++)
{
if(planStore[i]->plHandle == plHandle)
{
planStore.erase(planStore.begin()+ i);
}
}
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
}
clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
{
// go through search
for(int i = 0; i < planStore.size(); i ++)
{
FftPlan * plan = planStore[i];
if(
plan->dft_size.width == _dft_size.width &&
plan->dft_size.height == _dft_size.height &&
plan->flags == _flags &&
plan->src_step == _src_step &&
plan->dst_step == _dst_step &&
plan->type == _type
)
{
return plan->plHandle;
}
}
// no baked plan is found
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
planStore.push_back(newPlan);
return newPlan->plHandle;
}
void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags)
{
if(dft_size == Size(0,0))
{
dft_size = src.size();
}
// check if the given dft size is of optimal dft size
CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
// similar assertions with cuda module
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
// we don't support DFT_SCALE flag
CV_Assert(!(DFT_SCALE & flags));
bool is_1d_input = (src.rows == 1);
int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
// We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output);
FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
switch(type)
{
case C2C:
dst.create(src.rows, src.cols, CV_32FC2);
break;
case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
dst.create(src.rows, src.cols/2 + 1, CV_32FC2);
break;
case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
CV_Assert(!is_row_dft); // this is not supported yet
dst.create(src.rows, dft_size.width, CV_32FC1);
break;
default:
//std::runtime_error("does not support this convertion!");
cout << "Does not support this convertion!" << endl;
throw exception();
break;
}
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
//get the buffersize
size_t buffersize=0;
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
//allocate the intermediate buffer
cl_mem clMedBuffer=NULL;
if (buffersize)
{
cl_int medstatus;
clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
openCLSafeCall( medstatus );
}
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD,
1,
&src.clCxt->impl->clCmdQueue,
0, NULL, NULL,
(cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) );
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
if(clMedBuffer)
{
openCLFree(clMedBuffer);
}
}
#endif
#endif //HAVE_CLAMDFFT

161
modules/ocl/src/gemm.cpp Normal file
View File

@ -0,0 +1,161 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
#ifdef HAVE_CLAMDBLAS
#include "clAmdBlas.h"
#if !defined (HAVE_OPENCL)
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
#else
using namespace cv;
void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha,
const oclMat& src3, double beta, oclMat& dst, int flags)
{
CV_Assert(src1.cols == src2.rows &&
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
if(!src3.empty())
{
src3.copyTo(dst);
}
else
{
dst.create(src1.rows, src2.cols, src1.type());
dst.setTo(Scalar::all(0));
}
openCLSafeCall( clAmdBlasSetup() );
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
const clAmdBlasOrder order = clAmdBlasRowMajor;
const int M = src1.rows;
const int N = src2.cols;
const int K = src1.cols;
int lda = src1.step;
int ldb = src2.step;
int ldc = dst.step;
int offa = src1.offset;
int offb = src2.offset;
int offc = dst.offset;
switch(src1.type())
{
case CV_32FC1:
lda /= sizeof(float);
ldb /= sizeof(float);
ldc /= sizeof(float);
offa /= sizeof(float);
offb /= sizeof(float);
offc /= sizeof(float);
openCLSafeCall
(
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
);
break;
case CV_64FC1:
lda /= sizeof(double);
ldb /= sizeof(double);
ldc /= sizeof(double);
offa /= sizeof(double);
offb /= sizeof(double);
offc /= sizeof(double);
openCLSafeCall
(
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
);
break;
case CV_32FC2:
{
lda /= sizeof(std::complex<float>);
ldb /= sizeof(std::complex<float>);
ldc /= sizeof(std::complex<float>);
offa /= sizeof(std::complex<float>);
offb /= sizeof(std::complex<float>);
offc /= sizeof(std::complex<float>);
cl_float2 alpha_2 = {{alpha, 0}};
cl_float2 beta_2 = {{beta, 0}};
openCLSafeCall
(
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
);
}
break;
case CV_64FC2:
{
lda /= sizeof(std::complex<double>);
ldb /= sizeof(std::complex<double>);
ldc /= sizeof(std::complex<double>);
offa /= sizeof(std::complex<double>);
offb /= sizeof(std::complex<double>);
offc /= sizeof(std::complex<double>);
cl_double2 alpha_2 = {{alpha, 0}};
cl_double2 beta_2 = {{beta, 0}};
openCLSafeCall
(
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
);
}
break;
}
clAmdBlasTeardown();
}
#endif
#endif

View File

@ -0,0 +1,97 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, bool)
{
cv::Size dft_size;
bool dft_rows;
std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
int devnums = getDevice(oclinfo);
CV_Assert(devnums > 0);
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
}
};
TEST_P(Dft, C2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
cv::Mat b_gold;
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::ocl::oclMat d_b;
cv::dft(a, b_gold, flags);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
}
TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
testing::Values(false, true)));
#endif // HAVE_CLAMDFFT

View File

@ -0,0 +1,85 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
cv::ocl::getDevice(info);
}
};
TEST_P(Gemm, Accuracy)
{
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat dst;
cv::ocl::oclMat ocl_dst;
cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
#endif