catching OpenCL double not supported exceptions

This commit is contained in:
Ilya Lavrenov
2013-10-09 18:05:09 +04:00
parent fccd37de7e
commit 9d1636daa6
33 changed files with 399 additions and 409 deletions

View File

@@ -69,7 +69,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F))
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
@@ -242,9 +242,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst)
static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp,
string kernelName, const cv::ocl::ProgramEntry* source)
{
CV_Assert(src1.type() == src2.type());
dst.create(src1.size(), CV_8UC1);
Context *clCxt = src1.clCxt;
int depth = src1.depth();
size_t localThreads[3] = { 64, 4, 1 };
@@ -271,7 +269,7 @@ static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads,
openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
}
@@ -279,11 +277,11 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int
{
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
cout << "Selected device do not support double" << endl;
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
CV_Assert(src1.channels() == 1 && src2.channels() == 1);
CV_Assert(src1.type() == src2.type() && src1.channels() == 1);
CV_Assert(cmpOp >= CMP_EQ && cmpOp <= CMP_NE);
compare_run(src1, src2, dst, cmpOp, "arithm_compare", &arithm_compare);
@@ -363,7 +361,7 @@ Scalar cv::ocl::sum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return Scalar::all(0);
}
static sumFunc functab[3] =
@@ -382,7 +380,7 @@ Scalar cv::ocl::absSum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return cv::Scalar::all(0);
}
@@ -402,7 +400,7 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return cv::Scalar::all(0);
}
static sumFunc functab[3] =
@@ -412,7 +410,7 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
arithmetic_sum<double>
};
int ddepth = src.depth() <= CV_32S ? CV_32S : CV_64F;
int ddepth = std::max(src.depth(), CV_32S);
sumFunc func = functab[ddepth - CV_32S];
return func(src, SQR_SUM, ddepth);
}
@@ -423,6 +421,12 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
{
if (src.depth() == CV_64F && !src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
double total = 1.0 / src.size().area();
mean = sum(src);
@@ -455,7 +459,8 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem
ostringstream stream;
stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()];
stream << " -D MAX_VAL=" << (WT)numeric_limits<T>::max();
stream << " -D MIN_VAL=" << (WT)numeric_limits<T>::min();
stream << " -D MIN_VAL=" << (numeric_limits<T>::is_integer ?
(WT)numeric_limits<T>::min() : -(WT)(std::numeric_limits<T>::max()));
string buildOptions = stream.str();
vector<pair<size_t , const void *> > args;
@@ -532,7 +537,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
@@ -566,8 +571,13 @@ double cv::ocl::norm(const oclMat &src1, int normType)
static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff, int ntype)
{
CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0));
Context *clCxt = src1.clCxt;
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0));
int ddepth = std::max(src1.depth(), CV_32S);
if (ntype == NORM_L2)
@@ -621,13 +631,12 @@ static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & s
double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
{
CV_Assert(!src1.empty());
CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size()));
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return -1;
}
CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size()));
bool isRelative = (normType & NORM_RELATIVE) != 0;
normType &= NORM_TYPE_MASK;
@@ -670,17 +679,6 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
CV_Assert(src.cols == dst.cols && src.rows == dst.rows);
CV_Assert(src.type() == dst.type());
Context *clCxt = src.clCxt;
int channels = dst.oclchannels();
int depth = dst.depth();
@@ -712,21 +710,11 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern
args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
openCLExecuteKernel(clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(src.clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args, -1, depth);
}
static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
CV_Assert(src.cols == dst.cols && src.rows == dst.rows);
CV_Assert(src.type() == dst.type());
Context *clCxt = src.clCxt;
int channels = dst.oclchannels();
int depth = dst.depth();
@@ -765,16 +753,21 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern
const cv::ocl::ProgramEntry* source = isVertical ? &arithm_flip_rc : &arithm_flip;
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
}
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
{
dst.create(src.size(), src.type());
if (flipCode == 0)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
arithmetic_flip_rows_run(src, dst, "arithm_flip_rows");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
dst.create(src.size(), src.type());
if (flipCode == 0)
arithmetic_flip_rows_run(src, dst, "arithm_flip_rows");
else if (flipCode > 0)
arithmetic_flip_cols_run(src, dst, "arithm_flip_cols", false);
else
@@ -787,7 +780,6 @@ void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
static void arithmetic_lut_run(const oclMat &src, const oclMat &lut, oclMat &dst, string kernelName)
{
Context *clCxt = src.clCxt;
int sdepth = src.depth();
int src_step1 = src.step1(), dst_step1 = dst.step1();
int src_offset1 = src.offset / src.elemSize1(), dst_offset1 = dst.offset / dst.elemSize1();
@@ -812,19 +804,26 @@ static void arithmetic_lut_run(const oclMat &src, const oclMat &lut, oclMat &dst
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
openCLExecuteKernel(clCxt, &arithm_LUT, kernelName, globalSize, localSize,
openCLExecuteKernel(src.clCxt, &arithm_LUT, kernelName, globalSize, localSize,
args, lut.oclchannels(), -1, buildOptions.c_str());
}
void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
{
if (!lut.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && lut.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
int cn = src.channels(), depth = src.depth();
CV_Assert(depth == CV_8U || depth == CV_8S);
CV_Assert(lut.channels() == 1 || lut.channels() == src.channels());
CV_Assert(lut.rows == 1 && lut.cols == 256);
dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn));
string kernelName = "LUT";
arithmetic_lut_run(src, lut, dst, kernelName);
arithmetic_lut_run(src, lut, dst, "LUT");
}
//////////////////////////////////////////////////////////////////////////////
@@ -836,7 +835,7 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernel
Context *clCxt = src.clCxt;
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
@@ -884,13 +883,6 @@ void cv::ocl::log(const oclMat &src, oclMat &dst)
static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
{
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
Context *clCxt = src1.clCxt;
int channels = dst.oclchannels();
int depth = dst.depth();
@@ -914,11 +906,17 @@ static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
openCLExecuteKernel(clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst)
{
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size() &&
(src1.depth() == CV_32F || src1.depth() == CV_64F));
@@ -928,13 +926,6 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst)
static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
Context *clCxt = src1.clCxt;
int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels();
int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1();
int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1();
@@ -956,11 +947,17 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
{
if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0);
@@ -975,13 +972,6 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleI
static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
string kernelName, bool angleInDegrees)
{
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
Context *clCxt = src1.clCxt;
int channels = src1.oclchannels();
int depth = src1.depth();
@@ -1008,11 +998,17 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&tmp ));
openCLExecuteKernel(clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat &angle, bool angleInDegrees)
{
if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
mag.create(x.size(), x.type());
@@ -1028,13 +1024,6 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat
static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
string kernelName)
{
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
Context *clCxt = src2.clCxt;
int channels = src2.oclchannels();
int depth = src2.depth();
@@ -1065,21 +1054,25 @@ static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &d
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&tmp ));
openCLExecuteKernel(clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees)
{
if (!magnitude.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && magnitude.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
CV_Assert(angle.depth() == CV_32F || angle.depth() == CV_64F);
CV_Assert(magnitude.size() == angle.size() && magnitude.type() == angle.type());
x.create(angle.size(), angle.type());
y.create(angle.size(), angle.type());
if ( magnitude.data )
{
CV_Assert( magnitude.size() == angle.size() && magnitude.type() == angle.type() );
arithmetic_ptc_run(magnitude, angle, x, y, angleInDegrees, "arithm_polarToCart_mag");
}
else
arithmetic_ptc_run(magnitude, angle, x, y, angleInDegrees, "arithm_polarToCart");
}
@@ -1211,7 +1204,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
@@ -1269,7 +1262,8 @@ int cv::ocl::countNonZero(const oclMat &src)
Context *clCxt = src.clCxt;
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "selected device doesn't support double");
return -1;
}
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
@@ -1302,8 +1296,6 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName
{
dst.create(src1.size(), src1.type());
Context *clCxt = src1.clCxt;
int channels = dst.oclchannels();
int depth = dst.depth();
@@ -1332,7 +1324,7 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
enum { AND = 0, OR, XOR };
@@ -1340,13 +1332,6 @@ enum { AND = 0, OR, XOR };
static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Scalar& src3, const oclMat &mask,
oclMat &dst, int operationType)
{
Context *clCxt = src1.clCxt;
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
cout << "Selected device does not support double" << endl;
return;
}
CV_Assert(operationType >= AND && operationType <= XOR);
CV_Assert(src2.empty() || (!src2.empty() && src1.type() == src2.type() && src1.size() == src2.size()));
CV_Assert(mask.empty() || (!mask.empty() && mask.type() == CV_8UC1 && mask.size() == src1.size()));
@@ -1405,7 +1390,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
openCLExecuteKernel(clCxt, mask.empty() ? (!src2.empty() ? &arithm_bitwise_binary : &arithm_bitwise_binary_scalar) :
openCLExecuteKernel(src1.clCxt, mask.empty() ? (!src2.empty() ? &arithm_bitwise_binary : &arithm_bitwise_binary_scalar) :
(!src2.empty() ? &arithm_bitwise_binary_mask : &arithm_bitwise_binary_scalar_mask),
kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
@@ -1413,15 +1398,14 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
cout << "Selected device does not support double" << endl;
CV_Error(CV_OpenCLDoubleNotSupported, "selected device doesn't support double");
return;
}
dst.create(src.size(), src.type());
string kernelName = "arithm_bitwise_not";
bitwise_unary_run(src, dst, kernelName, &arithm_bitwise_not);
bitwise_unary_run(src, dst, "arithm_bitwise_not", &arithm_bitwise_not);
}
void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
@@ -1541,13 +1525,6 @@ oclMatExpr::operator oclMat() const
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
{
Context *clCxt = src.clCxt;
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char channelsString[] = { ' ', ' ', '2', '4', '4' };
std::string buildOptions = format("-D T=%s%c", typeMap[src.depth()],
@@ -1569,13 +1546,17 @@ static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, boo
args.push_back( make_pair( sizeof(cl_int), (void *)&srcoffset1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
openCLExecuteKernel(clCxt, &arithm_transpose, kernelName, globalThreads, localThreads,
openCLExecuteKernel(src.clCxt, &arithm_transpose, kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
}
void cv::ocl::transpose(const oclMat &src, oclMat &dst)
{
CV_Assert(src.depth() <= CV_64F && src.channels() <= 4);
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
if ( src.data == dst.data && src.cols == src.rows && dst.offset == src.offset
&& dst.size() == src.size())
@@ -1597,7 +1578,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!hasDouble && src1.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
@@ -1661,10 +1642,6 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows);
CV_Assert(src1.type() == dst.type());
Context *clCxt = src1.clCxt;
int channels = dst.oclchannels();
int depth = dst.depth();
@@ -1694,22 +1671,21 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string
else
args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
{
if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.type() == CV_64F)
if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F)
{
cout << "Selected device do not support double" << endl;
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
CV_Assert(x.depth() == CV_32F || x.depth() == CV_64F);
y.create(x.size(), x.type());
string kernelName = "arithm_pow";
arithmetic_pow_run(x, p, y, kernelName, &arithm_pow);
arithmetic_pow_run(x, p, y, "arithm_pow", &arithm_pow);
}
//////////////////////////////////////////////////////////////////////////////
@@ -1718,10 +1694,9 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar)
{
Context *clCxt = Context::getContext();
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
@@ -1745,6 +1720,6 @@ void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar)
oclMat sc(1, 1, src.type(), scalar);
args.push_back( make_pair( sizeof(cl_mem), (void *)&sc.data ));
openCLExecuteKernel(clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads,
openCLExecuteKernel(src.clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads,
args, -1, -1, buildOptions.c_str());
}

View File

@@ -517,14 +517,14 @@ Context* Context::getContext()
{
if (initializeOpenCLDevices() == 0)
{
CV_Error(CV_GpuNotSupported, "OpenCL not available");
CV_Error(CV_OpenCLInitError, "OpenCL not available");
}
}
if (!__deviceSelected)
{
if (!selectOpenCLDevice())
{
CV_Error(CV_GpuNotSupported, "Can't select OpenCL device");
CV_Error(CV_OpenCLInitError, "Can't select OpenCL device");
}
}
}

View File

@@ -1417,7 +1417,7 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}

View File

@@ -977,7 +977,7 @@ namespace cv
CV_Assert(src.type() == CV_8UC1);
if(!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "select device don't support double");
return;
}
@@ -1168,7 +1168,7 @@ namespace cv
{
if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "select device don't support double");
}
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
@@ -1187,7 +1187,7 @@ namespace cv
{
if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_OpenCLDoubleNotSupported, "select device don't support double");
}
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
@@ -1301,10 +1301,11 @@ namespace cv
if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
// if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
// {
// CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
// }
// if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
// {
// CV_Error( CV_OpenCLDoubleNotSupportedNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
// return;
// }
dstr.create( src.size(), CV_8UC4 );
dstsp.create( src.size(), CV_16SC2 );

View File

@@ -164,7 +164,7 @@ void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src
{
//if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
//{
// CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
// CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
// return;
//}

View File

@@ -119,6 +119,12 @@ static void convert_C4C3(const oclMat &src, cl_mem &dst)
void cv::ocl::oclMat::upload(const Mat &m)
{
if (!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && m.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
CV_DbgAssert(!m.empty());
Size wholeSize;
Point ofs;
@@ -308,7 +314,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) &&
(depth() == CV_64F || dst.depth() == CV_64F))
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}

View File

@@ -59,7 +59,7 @@ namespace cv
{
if(!mat_dst.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_dst.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
@@ -154,7 +154,7 @@ namespace cv
if(!mat_src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}

View File

@@ -75,6 +75,7 @@ public:
void calc_non_rbf_base( int vec_count, const int row_idx, Qfloat* results, Mat& src);
void calc_rbf( int vec_count, const int row_idx, Qfloat* results, Mat& src);
};
class CvSVMSolver_ocl: public CvSVMSolver
{
public:
@@ -90,13 +91,16 @@ typedef struct CvSparseVecElem32f
int idx;
float val;
} CvSparseVecElem32f;
static int icvCmpSparseVecElems( const void* a, const void* b )
{
return ((CvSparseVecElem32f*)a)->idx - ((CvSparseVecElem32f*)b)->idx;
}
void cvPreparePredictData( const CvArr* sample, int dims_all, const CvMat* comp_idx,
int class_count, const CvMat* prob, float** row_sample,
int as_sparse CV_DEFAULT(0) );
void cvPreparePredictData( const CvArr* _sample, int dims_all,
const CvMat* comp_idx, int class_count,
const CvMat* prob, float** _row_sample,
@@ -135,9 +139,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all,
}
if( d == 1 )
{
sizes[1] = 1;
}
if( sizes[0] + sizes[1] - 1 != dims_all )
CV_ERROR( CV_StsUnmatchedSizes,
@@ -184,25 +186,19 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all,
sample_step = CV_IS_MAT_CONT(sample->type) ? 1 : sample->step / sizeof(row_sample[0]);
if( !comp_idx && CV_IS_MAT_CONT(sample->type) && !as_sparse )
{
*_row_sample = sample_data;
}
else
{
CV_CALL( row_sample = (float*)cvAlloc( vec_size ));
if( !comp_idx )
for( i = 0; i < dims_selected; i++ )
{
row_sample[i] = sample_data[sample_step * i];
}
else
{
int* comp = comp_idx->data.i;
for( i = 0; i < dims_selected; i++ )
{
row_sample[i] = sample_data[sample_step * comp[i]];
}
}
*_row_sample = row_sample;
@@ -236,9 +232,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all,
CV_CALL( inverse_comp_idx = (int*)cvAlloc( dims_all * sizeof(int) ));
memset( inverse_comp_idx, -1, dims_all * sizeof(int) );
for( i = 0; i < dims_selected; i++ )
{
inverse_comp_idx[comp_idx->data.i[i]] = i;
}
}
if( !as_sparse )
@@ -252,9 +246,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all,
{
idx = inverse_comp_idx[idx];
if( idx < 0 )
{
continue;
}
}
row_sample[idx] = *(float*)CV_NODE_VAL( sparse, node );
}
@@ -270,9 +262,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all,
{
idx = inverse_comp_idx[idx];
if( idx < 0 )
{
continue;
}
}
ptr->idx = idx;
ptr->val = *(float*)CV_NODE_VAL( sparse, node );
@@ -290,9 +280,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all,
__END__;
if( inverse_comp_idx )
{
cvFree( &inverse_comp_idx );
}
if( cvGetErrStatus() < 0 && _row_sample )
{
@@ -300,6 +288,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all,
*_row_sample = 0;
}
}
float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool returnDFVal ) const
{
assert( kernel );
@@ -323,9 +312,7 @@ float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool retur
((CvSVMKernel_ocl*)kernel)->calc( sv_count, row_index, buffer, src);
for( i = 0; i < sv_count; i++ )
{
sum += buffer[i] * df->alpha[i];
}
result = params.svm_type == ONE_CLASS ? (float)(sum > 0) : (float)sum;
}
@@ -341,27 +328,20 @@ float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool retur
double sum = 0.;
for( i = 0; i < class_count; i++ )
{
for( j = i + 1; j < class_count; j++, df++ )
{
sum = -df->rho;
int sv_count = df->sv_count;
for( k = 0; k < sv_count; k++ )
{
sum += df->alpha[k] * buffer[df->sv_index[k]];
}
vote[sum > 0 ? i : j]++;
}
}
for( i = 1, k = 0; i < class_count; i++ )
{
if( vote[i] > vote[k] )
{
k = i;
}
}
result = returnDFVal && class_count == 2 ? (float)sum : (float)(class_labels->data.i[k]);
}
else
@@ -370,11 +350,13 @@ float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool retur
return result;
}
float CvSVM_OCL::predict( const Mat& _sample, bool returnDFVal ) const
{
CvMat sample = _sample;
return CvSVM::predict(&sample, returnDFVal);
}
float CvSVM_OCL::predict( const int row_index, Mat& src, bool returnDFVal) const
{
float result = 0;
@@ -383,6 +365,7 @@ float CvSVM_OCL::predict( const int row_index, Mat& src, bool returnDFVal) const
return result;
}
#undef get_C
#define get_C(i) (C[y[i]>0])
#undef is_upper_bound
@@ -397,12 +380,14 @@ CvSVMSolver_ocl::CvSVMSolver_ocl(const CvSVMParams* _params)
{
params = _params;
}
float* CvSVMSolver_ocl::get_row( int i, float* dst, Mat& src )
{
bool existed = false;
float* row = get_row_base( i, &existed, src);
return (this->*get_row_func)( i, row, dst, existed );
}
float* CvSVMSolver_ocl::get_row_base( int i, bool* _existed, Mat& src )
{
int i1 = i < sample_count ? i : i - sample_count;
@@ -434,19 +419,16 @@ float* CvSVMSolver_ocl::get_row_base( int i, bool* _existed, Mat& src )
row->prev->next = row->next->prev = row;
if( !existed )
{
((CvSVMKernel_ocl*)kernel)->calc( sample_count, i1, row->data, src);
}
if( _existed )
{
*_existed = existed;
}
return row->data;
}
#ifndef HAVE_CLAMDBLAS
static void matmul_sigmod(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1)
{
Context *clCxt = Context::getContext();
@@ -486,6 +468,7 @@ static void matmul_sigmod(oclMat & src, oclMat & src2, oclMat & dst, int src_row
}
openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1);
}
static void matmul_poly(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1, double degree1, bool flag)
{
Context *clCxt = Context::getContext();
@@ -534,6 +517,7 @@ static void matmul_poly(oclMat & src, oclMat & src2, oclMat & dst, int src_rows,
}
openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
static void matmul_linear(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1)
{
Context *clCxt = Context::getContext();
@@ -573,6 +557,7 @@ static void matmul_linear(oclMat & src, oclMat & src2, oclMat & dst, int src_row
}
openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1);
}
#endif // #ifndef HAVE_CLAMDBLAS
static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, int src2_cols, int var_count, double gamma1, bool flag)
@@ -594,9 +579,8 @@ static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, in
char build_options[50];
if(flag)
{
sprintf(build_options, "-D ADDEXP");
}
vector< pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void* )&src.data));
args.push_back(make_pair(sizeof(cl_int), (void* )&src_step));
@@ -614,9 +598,7 @@ static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, in
args.push_back(make_pair(sizeof(cl_float), (void* )&gamma));
}
else
{
args.push_back(make_pair(sizeof(cl_double), (void* )&gamma1));
}
openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
@@ -649,14 +631,12 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
CV_CALL( cvPreparePredictData(&sample, var_all, var_idx,
class_count, 0, &row_sample ));
for(int j = 0; j < var_count; ++j)
{
src_temp.at<float>(i, j) = row_sample[j];
}
__END__;
}
Mat dst1;
double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0, degree1 = 0.0;
double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0;
if(params.kernel_type == CvSVM::LINEAR)
{
alpha1 = 1;
@@ -666,7 +646,6 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
{
alpha1 = params.gamma;
beta1 = params.coef0;
degree1 = params.degree;
}
if(params.kernel_type == CvSVM::SIGMOID)
{
@@ -674,27 +653,22 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
beta1 = - 2 * params.coef0;
}
if(params.kernel_type == CvSVM::RBF)
{
gamma1 = - params.gamma;
}
Mat sv_temp = Mat(sv_total, var_count, CV_32FC1, Scalar::all(0));
for(int i = 0; i < sv_total; ++i)
{
for(int j = 0; j < var_count; ++j)
{
sv_temp.at<float>(i, j) = sv[i][j];
}
}
oclMat src(sample_count, var_count, CV_32FC1, Scalar::all(0));
oclMat sv_;
src.upload(src_temp);
oclMat dst;
#if defined HAVE_CLAMDBLAS
#ifdef HAVE_CLAMDBLAS
dst = oclMat(sample_count, sv_total, CV_32FC1);
oclMat src3(sample_count, sv_total, CV_32FC1, Scalar::all(1));
@@ -707,15 +681,15 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
}
#else
double degree1 = 0.0;
if (params.kernel_type == CvSVM::POLY)
degree1 = params.degree;
if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
dst = oclMat(sample_count, sv_total, CV_32FC1);
}
else
{
dst = oclMat(sample_count, sv_total, CV_64FC1);
}
if(params.kernel_type == CvSVM::LINEAR)
{
sv_.upload(sv_temp);
@@ -731,13 +705,9 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
{
sv_.upload(sv_temp);
if(sample_count > 0)
{
matmul_poly(src, sv_, dst, sample_count, sv_total, var_count, alpha1, beta1, degree1, true);
}
else
{
matmul_poly(src, sv_, dst, sample_count, sv_total, var_count, alpha1, beta1, degree1, false);
}
}
#endif
@@ -745,21 +715,14 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
{
sv_.upload(sv_temp);
if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
dst = oclMat(sample_count, sv_total, CV_32FC1);
}
else
{
dst = oclMat(sample_count, sv_total, CV_64FC1);
}
if(sample_count > 0)
{
matmul_rbf(src, sv_, dst, sample_count, sv_total, var_count, gamma1, true);
}
else
{
matmul_rbf(src, sv_, dst, sample_count, sv_total, var_count, gamma1, false);
}
}
dst.download(dst1);
@@ -768,22 +731,20 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const
{
int r = (int)this->predict(i, dst1);
if (results)
{
results->data.fl[i] = (float)r;
}
if (i == 0)
{
result = (float)r;
}
}
return result;
}
void CvSVM_OCL::predict( cv::InputArray _samples, cv::OutputArray _results ) const
{
_results.create(_samples.size().height, 1, CV_32F);
CvMat samples = _samples.getMat(), results = _results.getMat();
predict(&samples, &results);
}
bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
{
int iter = 0;
@@ -800,7 +761,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
}
}
Mat dst1;
double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0, degree1 = 0.0;
double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0;
if(params->kernel_type == CvSVM::LINEAR)
{
alpha1 = 1;
@@ -810,7 +771,6 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
{
alpha1 = params->gamma;
beta1 = params->coef0;
degree1 = params->degree;
}
if(params->kernel_type == CvSVM::SIGMOID)
{
@@ -834,7 +794,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
src.upload(src1);
oclMat dst;
#if defined HAVE_CLAMDBLAS
#ifdef HAVE_CLAMDBLAS
dst = oclMat(sample_count, sample_count, CV_32FC1);
oclMat src3(sample_count, sample_count, CV_32FC1, Scalar::all(1));
@@ -845,14 +805,15 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
}
#else
double degree1 = 0.0;
if(params->kernel_type == CvSVM::POLY)
degree1 = params->degree;
if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
dst = oclMat(sample_count, sample_count, CV_32FC1);
}
else
{
dst = oclMat(sample_count, sample_count, CV_64FC1);
}
if(params->kernel_type == CvSVM::LINEAR )
{
src_e = src;
@@ -868,13 +829,9 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
{
src_e = src;
if(sample_count > 0)
{
matmul_poly(src, src_e, dst, sample_count, sample_count, var_count, alpha1, beta1, degree1, true);
}
else
{
matmul_poly(src, src_e, dst, sample_count, sample_count, var_count, alpha1, beta1, degree1, false);
}
}
#endif
@@ -883,21 +840,14 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
{
src_e = src;
if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
dst = oclMat(sample_count, sample_count, CV_32FC1);
}
else
{
dst = oclMat(sample_count, sample_count, CV_64FC1);
}
if(sample_count > 0)
{
matmul_rbf(src, src_e, dst, sample_count, sample_count, var_count, gamma1, true);
}
else
{
matmul_rbf(src, src_e, dst, sample_count, sample_count, var_count, gamma1, false);
}
}
dst.download(dst1);
for( i = 0; i < alpha_count; i++ )
@@ -908,9 +858,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
double alpha_i = alpha[i];
for( j = 0; j < alpha_count; j++ )
{
G[j] += alpha_i * Q_i[j];
}
}
}
@@ -926,14 +874,10 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
for( i = 0; i < alpha_count; i++ )
{
if( fabs(G[i]) > 1e+300 )
{
return false;
}
if( fabs(alpha[i]) > 1e16 )
{
return false;
}
}
#endif
@@ -1021,9 +965,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
delta_alpha_j = alpha_j - old_alpha_j;
for( k = 0; k < alpha_count; k++ )
{
G[k] += Q_i[k] * delta_alpha_i + Q_j[k] * delta_alpha_j;
}
}
// calculate rho
@@ -1031,9 +973,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si )
// calculate objective value
for( i = 0, si.obj = 0; i < alpha_count; i++ )
{
si.obj += alpha[i] * (G[i] + b[i]);
}
si.obj *= 0.5;
@@ -1053,14 +993,11 @@ void CvSVMKernel_ocl::calc( int vcount, const int row_idx, Qfloat* results, Mat&
const Qfloat max_val = (Qfloat)(FLT_MAX * 1e-3);
int j;
for( j = 0; j < vcount; j++ )
{
if( results[j] > max_val )
{
results[j] = max_val;
}
}
// FIXIT #endif
}
bool CvSVMKernel_ocl::create( const CvSVMParams* _params, Calc_ocl _calc_func, Calc _calc_func1 )
{
clear();
@@ -1084,9 +1021,10 @@ CvSVMKernel_ocl::CvSVMKernel_ocl(const CvSVMParams* params, CvSVMKernel_ocl::Cal
CvSVMKernel::clear();
CvSVMKernel_ocl::create( params, _calc_func, _calc_func1 );
}
void CvSVMKernel_ocl::calc_non_rbf_base( int vcount, const int row_idx, Qfloat* results, Mat& src)
{
#if defined HAVE_CLAMDBLAS
#ifdef HAVE_CLAMDBLAS
for(int i = 0; i < vcount; i++)
{
@@ -1109,23 +1047,17 @@ void CvSVMKernel_ocl::calc_non_rbf_base( int vcount, const int row_idx, Qfloat*
}
#endif
}
void CvSVMKernel_ocl::calc_rbf( int vcount, const int row_idx, Qfloat* results, Mat& src)
{
if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
for(int m = 0; m < vcount; m++)
{
results[m] = (Qfloat) * src.ptr<float>(row_idx, m);
}
}
else
{
for(int m = 0; m < vcount; m++)
{
results[m] = (Qfloat) * src.ptr<double>(row_idx, m);
}
}
}
void CvSVMKernel_ocl::calc_linear( int vcount, const int row_idx, Qfloat* results, Mat& src )
{
calc_non_rbf_base( vcount, row_idx, results, src);
@@ -1133,16 +1065,13 @@ void CvSVMKernel_ocl::calc_linear( int vcount, const int row_idx, Qfloat* result
void CvSVMKernel_ocl::calc_poly( int vcount, const int row_idx, Qfloat* results, Mat& src)
{
calc_non_rbf_base( vcount, row_idx, results, src);
//FIXIT #if defined HAVE_CLAMDBLAS
CvMat R = cvMat( 1, vcount, QFLOAT_TYPE, results );
if( vcount > 0 )
{
cvPow( &R, &R, params->degree );
}
//FIXIT #endif
}
@@ -1157,16 +1086,13 @@ void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* resul
Qfloat t = results[j];
double e = ::exp(-fabs(t));
if( t > 0 )
{
results[j] = (Qfloat)((1. - e) / (1. + e));
}
else
{
results[j] = (Qfloat)((e - 1.) / (e + 1.));
}
}
//FIXIT #endif
}
CvSVM_OCL::CvSVM_OCL()
{
CvSVM();
@@ -1191,6 +1117,7 @@ void CvSVM_OCL::create_kernel()
{
kernel = new CvSVMKernel_ocl(&params, 0, 0);
}
void CvSVM_OCL::create_solver( )
{
solver = new CvSVMSolver_ocl(&params);