refactored and extended binary bitwise operations

This commit is contained in:
Ilya Lavrenov
2013-09-24 13:54:46 +04:00
parent 161674bff2
commit 8e0e352d77
5 changed files with 119 additions and 2578 deletions

View File

@@ -1290,7 +1290,8 @@ int cv::ocl::countNonZero(const oclMat &src)
//////////////////////////////////////////////////////////////////////////////
////////////////////////////////bitwise_op////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static void bitwise_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString)
static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString)
{
dst.create(src1.size(), src1.type());
@@ -1327,331 +1328,123 @@ static void bitwise_run(const oclMat &src1, oclMat &dst, string kernelName, cons
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
}
enum { AND = 0, OR, XOR };
template<typename T>
void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName,
const char **kernelString, void *_scalar, const char* _opt = NULL)
static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Scalar& src3, const oclMat &mask,
oclMat &dst, int operationType)
{
dst.create(src1.size(), src1.type());
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
src1.rows == src2.rows && src2.rows == dst.rows);
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt;
int channels = dst.oclchannels();
int depth = dst.depth();
if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
{
cout << "Selected device does not support double" << endl;
return;
}
int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
{4, 4, 4, 4, 1, 1, 1},
{4, 4, 4, 4, 1, 1, 1},
{4, 4, 4, 4, 1, 1, 1}
};
CV_Assert(operationType >= AND && operationType <= XOR);
CV_Assert(src2.empty() || (!src2.empty() && src1.type() == src2.type() && src1.size() == src2.size()));
CV_Assert(mask.empty() || (!mask.empty() && mask.type() == CV_8UC1 && mask.size() == src1.size()));
size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
dst.create(src1.size(), src1.type());
size_t localThreads[3] = { 64, 4, 1 };
size_t globalThreads[3] = { cols, dst.rows, 1 };
int elemSize = dst.elemSize();
int cols1 = dst.cols * elemSize;
oclMat m;
const char operationMap[] = { '&', '|', '^' };
std::string kernelName("arithm_bitwise_binary");
std::string buildOptions = format("-D Operation=%c", operationMap[operationType]);
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { cols1, dst.rows, 1 };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
if (src2.empty())
{
m.create(1, 1, dst.type());
m.setTo(src3);
args.push_back( make_pair( sizeof(cl_mem), (void *)&m.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&elemSize ) );
kernelName += "_scalar";
}
else
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
}
if (!mask.empty())
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&mask.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&mask.offset ));
if (!src2.empty())
args.push_back( make_pair( sizeof(cl_int), (void *)&elemSize ));
kernelName += "_mask";
}
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
T scalar;
if(_scalar != NULL)
{
double scalar1 = *((double *)_scalar);
scalar = (T)scalar1;
args.push_back( make_pair( sizeof(T), (void *)&scalar ));
}
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth, _opt);
}
static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
string kernelName, const char **kernelString, const char* _opt = NULL)
{
bitwise_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL, _opt);
}
static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
const oclMat &mask, string kernelName, const char **kernelString, const char* _opt = NULL)
{
dst.create(src1.size(), src1.type());
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
src1.rows == src2.rows && src2.rows == dst.rows &&
src1.rows == mask.rows && src1.cols == mask.cols);
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
CV_Assert(mask.type() == CV_8U);
Context *clCxt = src1.clCxt;
int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1},
{2, 2, 1, 1, 1, 1, 1},
{4, 4, 2, 2 , 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1}
};
size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
int cols = divUp(dst.cols + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
size_t globalThreads[3] = { cols, dst.rows, 1 };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&mask.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&mask.offset ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, _opt);
}
template <typename WT , typename CL_WT>
void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst,
const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt = NULL)
{
dst.create(src1.size(), src1.type());
CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows &&
src1.type() == dst.type());
if(mask.data)
{
CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols);
}
Context *clCxt = src1.clCxt;
int channels = dst.oclchannels();
int depth = dst.depth();
WT s[4] = { saturate_cast<WT>(src2.val[0]), saturate_cast<WT>(src2.val[1]),
saturate_cast<WT>(src2.val[2]), saturate_cast<WT>(src2.val[3])
};
int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1},
{2, 2, 1, 1, 1, 1, 1},
{4, 4, 2, 2 , 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1}
};
size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
int cols = divUp(dst.cols + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
size_t globalThreads[3] = { cols, dst.rows, 1 };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&src1.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&src1.offset));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.offset));
if(mask.data)
{
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset));
}
args.push_back( make_pair( sizeof(CL_WT) , (void *)&s ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&src1.rows ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step1 ));
if(isMatSubScalar != 0)
{
isMatSubScalar = isMatSubScalar > 0 ? 1 : 0;
args.push_back( make_pair( sizeof(cl_int) , (void *)&isMatSubScalar));
}
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, opt);
}
typedef void (*BitwiseFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst,
const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt);
static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst,
const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt)
{
static BitwiseFuncS tab[8] =
{
#if 0
bitwise_scalar_run<unsigned char>,
bitwise_scalar_run<char>,
bitwise_scalar_run<unsigned short>,
bitwise_scalar_run<short>,
bitwise_scalar_run<int>,
bitwise_scalar_run<float>,
bitwise_scalar_run<double>,
0
#else
bitwise_scalar_run<unsigned char, cl_uchar4>,
bitwise_scalar_run<char, cl_char4>,
bitwise_scalar_run<unsigned short, cl_ushort4>,
bitwise_scalar_run<short, cl_short4>,
bitwise_scalar_run<int, cl_int4>,
bitwise_scalar_run<float, cl_float4>,
bitwise_scalar_run<double, cl_double4>,
0
#endif
};
BitwiseFuncS func = tab[src1.depth()];
if(func == 0)
cv::ocl::error("Unsupported arithmetic operation", __FILE__, __LINE__);
func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar, opt);
}
static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst,
const oclMat &mask, string kernelName, const char **kernelString, const char * opt = NULL)
{
bitwise_scalar(src1, src2, dst, mask, kernelName, kernelString, 0, opt);
openCLExecuteKernel(clCxt, mask.empty() ? (!src2.empty() ? &arithm_bitwise_binary : &arithm_bitwise_binary_scalar) :
(!src2.empty() ? &arithm_bitwise_binary_mask : &arithm_bitwise_binary_scalar_mask),
kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
}
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
{
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
cout << "Selected device does not support double" << endl;
return;
}
dst.create(src.size(), src.type());
string kernelName = "arithm_bitwise_not";
bitwise_run(src, dst, kernelName, &arithm_bitwise_not);
bitwise_unary_run(src, dst, kernelName, &arithm_bitwise_not);
}
void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
// dst.create(src1.size(),src1.type());
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
}
string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
static const char opt [] = "-D OP_BINARY=|";
if (mask.empty())
bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
bitwise_binary_run(src1, src2, Scalar(), mask, dst, OR);
}
void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
}
static const char opt [] = "-D OP_BINARY=|";
string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
if (mask.data)
bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
bitwise_binary_run(src1, oclMat(), src2, mask, dst, OR);
}
void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
// dst.create(src1.size(),src1.type());
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
}
oclMat emptyMat;
string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
static const char opt [] = "-D OP_BINARY=&";
if (mask.empty())
bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
bitwise_binary_run(src1, src2, Scalar(), mask, dst, AND);
}
void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
}
static const char opt [] = "-D OP_BINARY=&";
string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
if (mask.data)
bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
bitwise_binary_run(src1, oclMat(), src2, mask, dst, AND);
}
void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
}
string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
static const char opt [] = "-D OP_BINARY=^";
if (mask.empty())
bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
bitwise_binary_run(src1, src2, Scalar(), mask, dst, XOR);
}
void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
}
string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
static const char opt [] = "-D OP_BINARY=^";
if (mask.data)
bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
bitwise_binary_run(src1, oclMat(), src2, mask, dst, XOR);
}
oclMat cv::ocl::operator ~ (const oclMat &src)