format files to ANSI C style with coolformat
change the download channels to oclchannles() fix bugs of arithm functions perf fix of bilateral bug fix of split test case add build_warps functions
This commit is contained in:
@@ -51,111 +51,114 @@
|
||||
#include "clAmdBlas.h"
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
|
||||
void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
|
||||
{
|
||||
throw_nogpu();
|
||||
}
|
||||
#else
|
||||
|
||||
using namespace cv;
|
||||
|
||||
void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha,
|
||||
const oclMat& src3, double beta, oclMat& dst, int flags)
|
||||
{
|
||||
CV_Assert(src1.cols == src2.rows &&
|
||||
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
|
||||
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
|
||||
if(!src3.empty())
|
||||
{
|
||||
src3.copyTo(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.create(src1.rows, src2.cols, src1.type());
|
||||
dst.setTo(Scalar::all(0));
|
||||
}
|
||||
openCLSafeCall( clAmdBlasSetup() );
|
||||
|
||||
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
|
||||
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
|
||||
const clAmdBlasOrder order = clAmdBlasRowMajor;
|
||||
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
|
||||
const oclMat &src3, double beta, oclMat &dst, int flags)
|
||||
{
|
||||
CV_Assert(src1.cols == src2.rows &&
|
||||
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
|
||||
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
|
||||
if(!src3.empty())
|
||||
{
|
||||
src3.copyTo(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.create(src1.rows, src2.cols, src1.type());
|
||||
dst.setTo(Scalar::all(0));
|
||||
}
|
||||
openCLSafeCall( clAmdBlasSetup() );
|
||||
|
||||
const int M = src1.rows;
|
||||
const int N = src2.cols;
|
||||
const int K = src1.cols;
|
||||
int lda = src1.step;
|
||||
int ldb = src2.step;
|
||||
int ldc = dst.step;
|
||||
int offa = src1.offset;
|
||||
int offb = src2.offset;
|
||||
int offc = dst.offset;
|
||||
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
|
||||
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
|
||||
const clAmdBlasOrder order = clAmdBlasRowMajor;
|
||||
|
||||
const int M = src1.rows;
|
||||
const int N = src2.cols;
|
||||
const int K = src1.cols;
|
||||
int lda = src1.step;
|
||||
int ldb = src2.step;
|
||||
int ldc = dst.step;
|
||||
int offa = src1.offset;
|
||||
int offb = src2.offset;
|
||||
int offc = dst.offset;
|
||||
|
||||
|
||||
switch(src1.type())
|
||||
{
|
||||
case CV_32FC1:
|
||||
lda /= sizeof(float);
|
||||
ldb /= sizeof(float);
|
||||
ldc /= sizeof(float);
|
||||
offa /= sizeof(float);
|
||||
offb /= sizeof(float);
|
||||
offc /= sizeof(float);
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
|
||||
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
break;
|
||||
case CV_64FC1:
|
||||
lda /= sizeof(double);
|
||||
ldb /= sizeof(double);
|
||||
ldc /= sizeof(double);
|
||||
offa /= sizeof(double);
|
||||
offb /= sizeof(double);
|
||||
offc /= sizeof(double);
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
|
||||
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
break;
|
||||
case CV_32FC2:
|
||||
{
|
||||
lda /= sizeof(std::complex<float>);
|
||||
ldb /= sizeof(std::complex<float>);
|
||||
ldc /= sizeof(std::complex<float>);
|
||||
offa /= sizeof(std::complex<float>);
|
||||
offb /= sizeof(std::complex<float>);
|
||||
offc /= sizeof(std::complex<float>);
|
||||
cl_float2 alpha_2 = {{alpha, 0}};
|
||||
cl_float2 beta_2 = {{beta, 0}};
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
|
||||
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
}
|
||||
break;
|
||||
case CV_64FC2:
|
||||
{
|
||||
lda /= sizeof(std::complex<double>);
|
||||
ldb /= sizeof(std::complex<double>);
|
||||
ldc /= sizeof(std::complex<double>);
|
||||
offa /= sizeof(std::complex<double>);
|
||||
offb /= sizeof(std::complex<double>);
|
||||
offc /= sizeof(std::complex<double>);
|
||||
cl_double2 alpha_2 = {{alpha, 0}};
|
||||
cl_double2 beta_2 = {{beta, 0}};
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
|
||||
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
clAmdBlasTeardown();
|
||||
}
|
||||
switch(src1.type())
|
||||
{
|
||||
case CV_32FC1:
|
||||
lda /= sizeof(float);
|
||||
ldb /= sizeof(float);
|
||||
ldc /= sizeof(float);
|
||||
offa /= sizeof(float);
|
||||
offb /= sizeof(float);
|
||||
offc /= sizeof(float);
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
|
||||
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
break;
|
||||
case CV_64FC1:
|
||||
lda /= sizeof(double);
|
||||
ldb /= sizeof(double);
|
||||
ldc /= sizeof(double);
|
||||
offa /= sizeof(double);
|
||||
offb /= sizeof(double);
|
||||
offc /= sizeof(double);
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
|
||||
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
break;
|
||||
case CV_32FC2:
|
||||
{
|
||||
lda /= sizeof(std::complex<float>);
|
||||
ldb /= sizeof(std::complex<float>);
|
||||
ldc /= sizeof(std::complex<float>);
|
||||
offa /= sizeof(std::complex<float>);
|
||||
offb /= sizeof(std::complex<float>);
|
||||
offc /= sizeof(std::complex<float>);
|
||||
cl_float2 alpha_2 = {{alpha, 0}};
|
||||
cl_float2 beta_2 = {{beta, 0}};
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
|
||||
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
}
|
||||
break;
|
||||
case CV_64FC2:
|
||||
{
|
||||
lda /= sizeof(std::complex<double>);
|
||||
ldb /= sizeof(std::complex<double>);
|
||||
ldc /= sizeof(std::complex<double>);
|
||||
offa /= sizeof(std::complex<double>);
|
||||
offb /= sizeof(std::complex<double>);
|
||||
offc /= sizeof(std::complex<double>);
|
||||
cl_double2 alpha_2 = {{alpha, 0}};
|
||||
cl_double2 beta_2 = {{beta, 0}};
|
||||
openCLSafeCall
|
||||
(
|
||||
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
|
||||
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
|
||||
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
clAmdBlasTeardown();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user