Add dft and gemm to ocl module, using AMD's clAmdFft and clAmdBlas libraries
This commit is contained in:
parent
7741d585f5
commit
c03ac12fcd
@ -140,6 +140,9 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
|
||||
OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) )
|
||||
OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF)
|
||||
OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) )
|
||||
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" OFF IF (NOT ANDROID AND NOT IOS) )
|
||||
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" OFF IF (NOT ANDROID AND NOT IOS) )
|
||||
|
||||
|
||||
# OpenCV build components
|
||||
# ===================================================
|
||||
@ -396,6 +399,12 @@ if(WITH_OPENCL)
|
||||
if(OPENCL_FOUND)
|
||||
set(HAVE_OPENCL 1)
|
||||
endif()
|
||||
if(WITH_OPENCLAMDFFT)
|
||||
set(HAVE_CLAMDFFT 1)
|
||||
endif()
|
||||
if(WITH_OPENCLAMDBLAS)
|
||||
set(HAVE_CLAMDBLAS 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
@ -2,8 +2,19 @@ if(APPLE)
|
||||
set(OPENCL_FOUND YES)
|
||||
set(OPENCL_LIBRARIES "-framework OpenCL")
|
||||
else()
|
||||
find_package(OpenCL QUIET)
|
||||
|
||||
#find_package(OpenCL QUIET)
|
||||
if(WITH_OPENCLAMDFFT)
|
||||
find_path(CLAMDFFT_INCLUDE_DIR
|
||||
NAMES clAmdFft.h)
|
||||
find_library(CLAMDFFT_LIBRARIES
|
||||
NAMES clAmdFft.Runtime)
|
||||
endif()
|
||||
if(WITH_OPENCLAMDBLAS)
|
||||
find_path(CLAMDBLAS_INCLUDE_DIR
|
||||
NAMES clAmdBlas.h)
|
||||
find_library(CLAMDBLAS_LIBRARIES
|
||||
NAMES clAmdBlas)
|
||||
endif()
|
||||
# Try AMD/ATI Stream SDK
|
||||
if (NOT OPENCL_FOUND)
|
||||
set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT})
|
||||
|
@ -175,6 +175,12 @@
|
||||
/* OpenCL Support */
|
||||
#cmakedefine HAVE_OPENCL
|
||||
|
||||
/* AMD's OpenCL Fast Fourier Transform Library*/
|
||||
#cmakedefine HAVE_CLAMDFFT
|
||||
|
||||
/* AMD's Basic Linear Algebra Subprograms Library*/
|
||||
#cmakedefine HAVE_CLAMDBLAS
|
||||
|
||||
/* NVidia Cuda Fast Fourier Transform (FFT) API*/
|
||||
#cmakedefine HAVE_CUFFT
|
||||
|
||||
|
@ -29,6 +29,14 @@ if (HAVE_OPENCL)
|
||||
if(OPENCL_INCLUDE_DIR)
|
||||
ocv_include_directories(${OPENCL_INCLUDE_DIR})
|
||||
endif()
|
||||
if (HAVE_CLAMDFFT)
|
||||
set(ocl_link_libs ${ocl_link_libs} ${CLAMDFFT_LIBRARIES})
|
||||
ocv_include_directories(${CLAMDFFT_INCLUDE_DIR})
|
||||
endif()
|
||||
if (HAVE_CLAMDBLAS)
|
||||
set(ocl_link_libs ${ocl_link_libs} ${CLAMDBLAS_LIBRARIES})
|
||||
ocv_include_directories(${CLAMDBLAS_INCLUDE_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
ocv_set_module_sources(
|
||||
|
@ -894,7 +894,35 @@ namespace cv
|
||||
// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
|
||||
CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf);
|
||||
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
|
||||
// the two functions must be called before/after run any fft library functions.
|
||||
CV_EXPORTS void fft_setup(); // this will be implicitly invoked
|
||||
CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually
|
||||
|
||||
/////////////////////////////////////// DFT /////////////////////////////////////////////////////
|
||||
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
|
||||
//! Param dft_size is the size of DFT transform.
|
||||
//!
|
||||
//! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
|
||||
// support src type of CV32FC1, CV32FC2
|
||||
// support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
|
||||
// dft_size is the size of original input, which is used for transformation from complex to real.
|
||||
// dft_size must be powers of 2, 3 and 5
|
||||
// real to complex dft requires at least v1.8 clAmdFft
|
||||
// real to complex dft output is not the same with cpu version
|
||||
// real to complex and complex to real does not support DFT_ROWS
|
||||
CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0);
|
||||
#endif // HAVE_CLAMDFFT
|
||||
|
||||
#ifdef HAVE_CLAMDBLAS
|
||||
//! implements generalized matrix product algorithm GEMM from BLAS
|
||||
// The functionality requires clAmdBlas library
|
||||
// only support type CV_32FC1
|
||||
// flag GEMM_3_T is not supported
|
||||
CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha,
|
||||
const oclMat& src3, double beta, oclMat& dst, int flags = 0);
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
302
modules/ocl/src/fft.cpp
Normal file
302
modules/ocl/src/fft.cpp
Normal file
@ -0,0 +1,302 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include <iomanip>
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
|
||||
#else
|
||||
|
||||
#include <clAmdFft.h>
|
||||
|
||||
namespace cv{ namespace ocl {
|
||||
enum FftType
|
||||
{
|
||||
C2R = 1, // complex to complex
|
||||
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
|
||||
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
|
||||
};
|
||||
struct FftPlan
|
||||
{
|
||||
friend void fft_setup();
|
||||
friend void fft_teardown();
|
||||
~FftPlan();
|
||||
protected:
|
||||
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
||||
const Size dft_size;
|
||||
const int src_step, dst_step;
|
||||
const int flags;
|
||||
const FftType type;
|
||||
clAmdFftPlanHandle plHandle;
|
||||
static vector<FftPlan*> planStore;
|
||||
static bool started;
|
||||
static clAmdFftSetupData * setupData;
|
||||
public:
|
||||
// return a baked plan->
|
||||
// if there is one matched plan, return it
|
||||
// if not, bake a new one, put it into the planStore and return it.
|
||||
static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
|
||||
};
|
||||
}}
|
||||
bool cv::ocl::FftPlan::started = false;
|
||||
vector<cv::ocl::FftPlan*> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan*>();
|
||||
clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0;
|
||||
|
||||
void cv::ocl::fft_setup()
|
||||
{
|
||||
if(FftPlan::started)
|
||||
{
|
||||
return;
|
||||
}
|
||||
FftPlan::setupData = new clAmdFftSetupData;
|
||||
openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData ));
|
||||
FftPlan::started = true;
|
||||
}
|
||||
void cv::ocl::fft_teardown()
|
||||
{
|
||||
if(!FftPlan::started)
|
||||
{
|
||||
return;
|
||||
}
|
||||
delete FftPlan::setupData;
|
||||
for(int i = 0; i < FftPlan::planStore.size(); i ++)
|
||||
{
|
||||
delete FftPlan::planStore[i];
|
||||
}
|
||||
FftPlan::planStore.clear();
|
||||
openCLSafeCall( clAmdFftTeardown( ) );
|
||||
FftPlan::started = false;
|
||||
}
|
||||
|
||||
// bake a new plan
|
||||
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
|
||||
: dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0)
|
||||
{
|
||||
if(!FftPlan::started)
|
||||
{
|
||||
// implicitly do fft setup
|
||||
fft_setup();
|
||||
}
|
||||
|
||||
bool is_1d_input = (_dft_size.height == 1);
|
||||
int is_row_dft = flags & DFT_ROWS;
|
||||
int is_scaled_dft = flags & DFT_SCALE;
|
||||
int is_inverse = flags & DFT_INVERSE;
|
||||
|
||||
clAmdFftResultLocation place;
|
||||
clAmdFftLayout inLayout;
|
||||
clAmdFftLayout outLayout;
|
||||
clAmdFftDim dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D;
|
||||
|
||||
size_t batchSize = is_row_dft?dft_size.height : 1;
|
||||
size_t clLengthsIn[ 3 ] = {1, 1, 1};
|
||||
size_t clStridesIn[ 3 ] = {1, 1, 1};
|
||||
size_t clLengthsOut[ 3 ] = {1, 1, 1};
|
||||
size_t clStridesOut[ 3 ] = {1, 1, 1};
|
||||
clLengthsIn[0] = dft_size.width;
|
||||
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
|
||||
clStridesIn[0] = 1;
|
||||
clStridesOut[0] = 1;
|
||||
|
||||
switch(_type)
|
||||
{
|
||||
case C2C:
|
||||
inLayout = CLFFT_COMPLEX_INTERLEAVED;
|
||||
outLayout = CLFFT_COMPLEX_INTERLEAVED;
|
||||
clStridesIn[1] = src_step / sizeof(std::complex<float>);
|
||||
clStridesOut[1] = clStridesIn[1];
|
||||
break;
|
||||
case R2C:
|
||||
CV_Assert(!is_row_dft); // this is not supported yet
|
||||
inLayout = CLFFT_REAL;
|
||||
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
|
||||
clStridesIn[1] = src_step / sizeof(float);
|
||||
clStridesOut[1] = dst_step / sizeof(std::complex<float>);
|
||||
break;
|
||||
case C2R:
|
||||
CV_Assert(!is_row_dft); // this is not supported yet
|
||||
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
|
||||
outLayout = CLFFT_REAL;
|
||||
clStridesIn[1] = src_step / sizeof(std::complex<float>);
|
||||
clStridesOut[1] = dst_step / sizeof(float);
|
||||
break;
|
||||
default:
|
||||
//std::runtime_error("does not support this convertion!");
|
||||
cout << "Does not support this convertion!" << endl;
|
||||
throw exception();
|
||||
break;
|
||||
}
|
||||
|
||||
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
|
||||
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
|
||||
|
||||
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) );
|
||||
|
||||
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
|
||||
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
|
||||
openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
|
||||
|
||||
openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) );
|
||||
openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
|
||||
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) );
|
||||
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
|
||||
}
|
||||
cv::ocl::FftPlan::~FftPlan()
|
||||
{
|
||||
for(int i = 0; i < planStore.size(); i ++)
|
||||
{
|
||||
if(planStore[i]->plHandle == plHandle)
|
||||
{
|
||||
planStore.erase(planStore.begin()+ i);
|
||||
}
|
||||
}
|
||||
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
|
||||
}
|
||||
|
||||
clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
|
||||
{
|
||||
// go through search
|
||||
for(int i = 0; i < planStore.size(); i ++)
|
||||
{
|
||||
FftPlan * plan = planStore[i];
|
||||
if(
|
||||
plan->dft_size.width == _dft_size.width &&
|
||||
plan->dft_size.height == _dft_size.height &&
|
||||
plan->flags == _flags &&
|
||||
plan->src_step == _src_step &&
|
||||
plan->dst_step == _dst_step &&
|
||||
plan->type == _type
|
||||
)
|
||||
{
|
||||
return plan->plHandle;
|
||||
}
|
||||
}
|
||||
// no baked plan is found
|
||||
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
|
||||
planStore.push_back(newPlan);
|
||||
return newPlan->plHandle;
|
||||
}
|
||||
|
||||
void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags)
|
||||
{
|
||||
if(dft_size == Size(0,0))
|
||||
{
|
||||
dft_size = src.size();
|
||||
}
|
||||
// check if the given dft size is of optimal dft size
|
||||
CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
|
||||
|
||||
// similar assertions with cuda module
|
||||
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
|
||||
|
||||
// we don't support DFT_SCALE flag
|
||||
CV_Assert(!(DFT_SCALE & flags));
|
||||
|
||||
bool is_1d_input = (src.rows == 1);
|
||||
int is_row_dft = flags & DFT_ROWS;
|
||||
int is_scaled_dft = flags & DFT_SCALE;
|
||||
int is_inverse = flags & DFT_INVERSE;
|
||||
bool is_complex_input = src.channels() == 2;
|
||||
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
|
||||
|
||||
// We don't support real-to-real transform
|
||||
CV_Assert(is_complex_input || is_complex_output);
|
||||
FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
|
||||
|
||||
switch(type)
|
||||
{
|
||||
case C2C:
|
||||
dst.create(src.rows, src.cols, CV_32FC2);
|
||||
break;
|
||||
case R2C:
|
||||
CV_Assert(!is_row_dft); // this is not supported yet
|
||||
dst.create(src.rows, src.cols/2 + 1, CV_32FC2);
|
||||
break;
|
||||
case C2R:
|
||||
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
|
||||
CV_Assert(!is_row_dft); // this is not supported yet
|
||||
dst.create(src.rows, dft_size.width, CV_32FC1);
|
||||
break;
|
||||
default:
|
||||
//std::runtime_error("does not support this convertion!");
|
||||
cout << "Does not support this convertion!" << endl;
|
||||
throw exception();
|
||||
break;
|
||||
}
|
||||
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
|
||||
|
||||
//get the buffersize
|
||||
size_t buffersize=0;
|
||||
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
|
||||
|
||||
//allocate the intermediate buffer
|
||||
cl_mem clMedBuffer=NULL;
|
||||
if (buffersize)
|
||||
{
|
||||
cl_int medstatus;
|
||||
clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
|
||||
openCLSafeCall( medstatus );
|
||||
}
|
||||
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
|
||||
is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD,
|
||||
1,
|
||||
&src.clCxt->impl->clCmdQueue,
|
||||
0, NULL, NULL,
|
||||
(cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) );
|
||||
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
|
||||
if(clMedBuffer)
|
||||
{
|
||||
openCLFree(clMedBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif //HAVE_CLAMDFFT
|
161
modules/ocl/src/gemm.cpp
Normal file
161
modules/ocl/src/gemm.cpp
Normal file
@ -0,0 +1,161 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include <iomanip>
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CLAMDBLAS
|
||||
|
||||
#include "clAmdBlas.h"
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
|
||||
#else
|
||||
|
||||
using namespace cv;
|
||||
|
||||
void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha,
|
||||
const oclMat& src3, double beta, oclMat& dst, int flags)
|
||||
{
|
||||
CV_Assert(src1.cols == src2.rows &&
|
||||
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
|
||||
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
|
||||
if(!src3.empty())
|
||||
{
|
||||
src3.copyTo(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.create(src1.rows, src2.cols, src1.type());
|
||||
dst.setTo(Scalar::all(0));
|
||||
}
|
||||
openCLSafeCall( clAmdBlasSetup() );
|
||||
|
||||
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
|
||||
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
|
||||
const clAmdBlasOrder order = clAmdBlasRowMajor;
|
||||
|
||||
const int M = src1.rows;
|
||||
const int N = src2.cols;
|
||||
const int K = src1.cols;
|
||||
int lda = src1.step;
|
||||
int ldb = src2.step;
|
||||
int ldc = dst.step;
|
||||
int offa = src1.offset;
|
||||
int offb = src2.offset;
|
||||
int offc = dst.offset;
|
||||
|
||||
|
||||
switch(src1.type())
|
||||
{
|
||||
case CV_32FC1:
|
||||
lda /= sizeof(float);
|
||||
ldb /= sizeof(float);
|
||||
ldc /= sizeof(float);
|
||||
offa /= sizeof(float);
|
||||
offb /= sizeof(float);
|
||||
offc /= sizeof(float);
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
|
||||
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
break;
|
||||
case CV_64FC1:
|
||||
lda /= sizeof(double);
|
||||
ldb /= sizeof(double);
|
||||
ldc /= sizeof(double);
|
||||
offa /= sizeof(double);
|
||||
offb /= sizeof(double);
|
||||
offc /= sizeof(double);
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
|
||||
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
break;
|
||||
case CV_32FC2:
|
||||
{
|
||||
lda /= sizeof(std::complex<float>);
|
||||
ldb /= sizeof(std::complex<float>);
|
||||
ldc /= sizeof(std::complex<float>);
|
||||
offa /= sizeof(std::complex<float>);
|
||||
offb /= sizeof(std::complex<float>);
|
||||
offc /= sizeof(std::complex<float>);
|
||||
cl_float2 alpha_2 = {{alpha, 0}};
|
||||
cl_float2 beta_2 = {{beta, 0}};
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
|
||||
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
}
|
||||
break;
|
||||
case CV_64FC2:
|
||||
{
|
||||
lda /= sizeof(std::complex<double>);
|
||||
ldb /= sizeof(std::complex<double>);
|
||||
ldc /= sizeof(std::complex<double>);
|
||||
offa /= sizeof(std::complex<double>);
|
||||
offb /= sizeof(std::complex<double>);
|
||||
offc /= sizeof(std::complex<double>);
|
||||
cl_double2 alpha_2 = {{alpha, 0}};
|
||||
cl_double2 beta_2 = {{beta, 0}};
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
|
||||
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
clAmdBlasTeardown();
|
||||
}
|
||||
#endif
|
||||
#endif
|
97
modules/ocl/test/test_fft.cpp
Normal file
97
modules/ocl/test/test_fft.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
using namespace std;
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Dft
|
||||
PARAM_TEST_CASE(Dft, cv::Size, bool)
|
||||
{
|
||||
cv::Size dft_size;
|
||||
bool dft_rows;
|
||||
std::vector<cv::ocl::Info> oclinfo;
|
||||
virtual void SetUp()
|
||||
{
|
||||
int devnums = getDevice(oclinfo);
|
||||
CV_Assert(devnums > 0);
|
||||
dft_size = GET_PARAM(0);
|
||||
dft_rows = GET_PARAM(1);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Dft, C2C)
|
||||
{
|
||||
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
|
||||
cv::Mat b_gold;
|
||||
int flags = 0;
|
||||
flags |= dft_rows ? cv::DFT_ROWS : 0;
|
||||
|
||||
cv::ocl::oclMat d_b;
|
||||
|
||||
cv::dft(a, b_gold, flags);
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
||||
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
|
||||
}
|
||||
|
||||
|
||||
TEST_P(Dft, R2CthenC2R)
|
||||
{
|
||||
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
|
||||
|
||||
int flags = 0;
|
||||
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
|
||||
|
||||
cv::ocl::oclMat d_b, d_c;
|
||||
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
|
||||
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
|
||||
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
|
||||
testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
|
||||
testing::Values(false, true)));
|
||||
|
||||
#endif // HAVE_CLAMDFFT
|
85
modules/ocl/test/test_gemm.cpp
Normal file
85
modules/ocl/test/test_gemm.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#include "precomp.hpp"
|
||||
using namespace std;
|
||||
#ifdef HAVE_CLAMDBLAS
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// GEMM
|
||||
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
|
||||
{
|
||||
int type;
|
||||
cv::Size mat_size;
|
||||
int flags;
|
||||
vector<cv::ocl::Info> info;
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
mat_size = GET_PARAM(1);
|
||||
flags = GET_PARAM(2);
|
||||
cv::ocl::getDevice(info);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Gemm, Accuracy)
|
||||
{
|
||||
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
|
||||
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
|
||||
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
|
||||
|
||||
cv::Mat dst;
|
||||
cv::ocl::oclMat ocl_dst;
|
||||
|
||||
cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
|
||||
cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
|
||||
|
||||
EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
|
||||
testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
|
||||
testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
|
||||
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user