ocl: split initialization.cpp into 3 files: context, operations, programcache

This commit is contained in:
Alexander Alekhin
2013-09-20 19:19:52 +04:00
parent 8e75947a7d
commit e8d9ed8955
36 changed files with 1699 additions and 1534 deletions

View File

@@ -51,50 +51,10 @@
//M*/
#include "precomp.hpp"
#include <iomanip>
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
using namespace std;
namespace cv
{
namespace ocl
{
//////////////////////////////// OpenCL kernel strings /////////////////////
extern const char *arithm_absdiff_nonsaturate;
extern const char *arithm_nonzero;
extern const char *arithm_sum;
extern const char *arithm_minMax;
extern const char *arithm_minMaxLoc;
extern const char *arithm_minMaxLoc_mask;
extern const char *arithm_LUT;
extern const char *arithm_add;
extern const char *arithm_add_mask;
extern const char *arithm_add_scalar;
extern const char *arithm_add_scalar_mask;
extern const char *arithm_bitwise_binary;
extern const char *arithm_bitwise_binary_mask;
extern const char *arithm_bitwise_binary_scalar;
extern const char *arithm_bitwise_binary_scalar_mask;
extern const char *arithm_bitwise_not;
extern const char *arithm_compare;
extern const char *arithm_transpose;
extern const char *arithm_flip;
extern const char *arithm_flip_rc;
extern const char *arithm_magnitude;
extern const char *arithm_cartToPolar;
extern const char *arithm_polarToCart;
extern const char *arithm_exp;
extern const char *arithm_log;
extern const char *arithm_addWeighted;
extern const char *arithm_phase;
extern const char *arithm_pow;
extern const char *arithm_setidentity;
}
}
//////////////////////////////////////////////////////////////////////////////
/////////////////////// add subtract multiply divide /////////////////////////
@@ -106,7 +66,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
oclMat &dst, int op_type, bool use_scalar = false)
{
Context *clCxt = src1.clCxt;
bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE);
bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F))
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
@@ -264,7 +224,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst)
//////////////////////////////////////////////////////////////////////////////
static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp,
string kernelName, const char **kernelString)
string kernelName, const cv::ocl::ProgramEntry* source)
{
CV_Assert(src1.type() == src2.type());
dst.create(src1.size(), CV_8UC1);
@@ -295,13 +255,13 @@ static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads,
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
}
void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp)
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
@@ -358,7 +318,7 @@ Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
{
CV_Assert(src.step % src.elemSize() == 0);
size_t groupnum = src.clCxt->computeUnits();
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int dbsize = groupnum * src.oclchannels();
@@ -385,7 +345,7 @@ typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth);
Scalar cv::ocl::sum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
@@ -396,7 +356,7 @@ Scalar cv::ocl::sum(const oclMat &src)
arithmetic_sum<double>
};
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
int ddepth = std::max(src.depth(), CV_32S);
if (!hasDouble && ddepth == CV_64F)
ddepth = CV_32F;
@@ -407,7 +367,7 @@ Scalar cv::ocl::sum(const oclMat &src)
Scalar cv::ocl::absSum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
@@ -418,7 +378,7 @@ Scalar cv::ocl::absSum(const oclMat &src)
arithmetic_sum<double>
};
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
int ddepth = std::max(src.depth(), CV_32S);
if (!hasDouble && ddepth == CV_64F)
ddepth = CV_32F;
@@ -429,7 +389,7 @@ Scalar cv::ocl::absSum(const oclMat &src)
Scalar cv::ocl::sqrSum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
@@ -440,7 +400,7 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
arithmetic_sum<double>
};
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F);
sumFunc func = functab[ddepth - CV_32S];
@@ -524,7 +484,7 @@ template <typename T, typename WT>
void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
const oclMat &mask, oclMat &buf)
{
size_t groupnum = src.clCxt->computeUnits();
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int dbsize = groupnum * 2 * src.elemSize();
@@ -566,7 +526,7 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons
if (minVal == NULL && maxVal == NULL)
return;
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
@@ -699,7 +659,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
@@ -746,7 +706,7 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern
static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
@@ -792,9 +752,9 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
const char **kernelString = isVertical ? &arithm_flip_rc : &arithm_flip;
const cv::ocl::ProgramEntry* source = isVertical ? &arithm_flip_rc : &arithm_flip;
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
}
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
@@ -860,10 +820,10 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
//////////////////////////////// exp log /////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString)
static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
Context *clCxt = src.clCxt;
if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
@@ -893,7 +853,7 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernel
args.push_back( make_pair( sizeof(cl_int), (void *)&srcstep1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads,
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads,
args, src.oclchannels(), -1, buildOptions.c_str());
}
@@ -913,7 +873,7 @@ void cv::ocl::log(const oclMat &src, oclMat &dst)
static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
@@ -955,9 +915,9 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst)
arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude");
}
static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
@@ -985,7 +945,7 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
@@ -1004,7 +964,7 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleI
static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
string kernelName, bool angleInDegrees)
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
@@ -1057,7 +1017,7 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat
static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
string kernelName)
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
@@ -1176,7 +1136,7 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask)
{
CV_Assert(src.oclchannels() == 1);
size_t groupnum = src.clCxt->computeUnits();
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int minloc = -1 , maxloc = -1;
int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ;
@@ -1238,7 +1198,7 @@ typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal,
void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
return;
@@ -1251,7 +1211,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
};
minMaxLocFunc func;
func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
func = functab[(int)src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)];
func(src, minVal, maxVal, minLoc, maxLoc, mask);
}
@@ -1296,7 +1256,7 @@ int cv::ocl::countNonZero(const oclMat &src)
CV_Assert(src.channels() == 1);
Context *clCxt = src.clCxt;
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
}
@@ -1327,7 +1287,7 @@ int cv::ocl::countNonZero(const oclMat &src)
////////////////////////////////bitwise_op////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString)
static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
dst.create(src1.size(), src1.type());
@@ -1361,7 +1321,7 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
enum { AND = 0, OR, XOR };
@@ -1370,7 +1330,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
oclMat &dst, int operationType)
{
Context *clCxt = src1.clCxt;
if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
cout << "Selected device does not support double" << endl;
return;
@@ -1442,7 +1402,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
cout << "Selected device does not support double" << endl;
return;
@@ -1571,7 +1531,7 @@ oclMatExpr::operator oclMat() const
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
{
Context *clCxt = src.clCxt;
if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
@@ -1623,7 +1583,7 @@ void cv::ocl::transpose(const oclMat &src, oclMat &dst)
void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst)
{
Context *clCxt = src1.clCxt;
bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE);
bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!hasDouble && src1.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
@@ -1688,7 +1648,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
/////////////////////////////////// Pow //////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const char **kernelString)
static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows);
CV_Assert(src1.type() == dst.type());
@@ -1718,17 +1678,17 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
float pf = static_cast<float>(p);
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
else
args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
{
if (!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F)
if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;