Merge pull request #1482 from ilya-lavrenov:ocl_setTO

This commit is contained in:
Alexander Smorkalov 2013-09-23 12:56:19 +04:00 committed by OpenCV Buildbot
commit 0cd3d1f4d6
5 changed files with 163 additions and 506 deletions

View File

@ -445,200 +445,62 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
//////////////////////////////// setTo //////////////////////////////////// //////////////////////////////// setTo ////////////////////////////////////
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
oclMat &cv::ocl::oclMat::operator = (const Scalar &s) oclMat &cv::ocl::oclMat::operator = (const Scalar &s)
{ {
setTo(s); setTo(s);
return *this; return *this;
} }
static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kernelName) static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kernelName)
{ {
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
size_t localThreads[3] = {16, 16, 1}; size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3]; size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
globalThreads[2] = 1;
int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
if(dst.type() == CV_8UC1)
{ if (dst.type() == CV_8UC1)
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
}
char compile_option[32]; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
union sc const char channelMap[] = { ' ', ' ', '2', '4', '4' };
{ std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);
cl_uchar4 uval;
cl_char4 cval; Mat mat(1, 1, dst.type(), scalar);
cl_ushort4 usval;
cl_short4 shval;
cl_int4 ival;
cl_float4 fval;
cl_double4 dval;
} val;
switch(dst.depth())
{
case CV_8U:
val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=uchar");
args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=uchar4");
args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_8S:
val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=char");
args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=char4");
args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_16U:
val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=ushort");
args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=ushort4");
args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_16S:
val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=short");
args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=short4");
args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_32S:
val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=int");
args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
break;
case 2:
sprintf(compile_option, "-D GENTYPE=int2");
cl_int2 i2val;
i2val.s[0] = val.ival.s[0];
i2val.s[1] = val.ival.s[1];
args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=int4");
args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_32F:
val.fval.s[0] = scalar.val[0];
val.fval.s[1] = scalar.val[1];
val.fval.s[2] = scalar.val[2];
val.fval.s[3] = scalar.val[3];
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=float");
args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=float4");
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_64F:
val.dval.s[0] = scalar.val[0];
val.dval.s[1] = scalar.val[1];
val.dval.s[2] = scalar.val[2];
val.dval.s[3] = scalar.val[3];
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=double");
args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=double4");
args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unknown depth");
}
#ifdef CL_VERSION_1_2 #ifdef CL_VERSION_1_2
//this enables backwards portability to // this enables backwards portability to
//run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
if(Context::getContext()->supportsFeature(Context::CL_VER_1_2) && if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
dst.offset == 0 && dst.cols == dst.wholecols) dst.offset == 0 && dst.cols == dst.wholecols)
{ {
const int sizeofMap[][7] =
{
{ sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) },
{ sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) },
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 },
{ sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) },
};
int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];
clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
(cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); (cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
0, dst.step * dst.rows, 0, NULL, NULL);
} }
else else
#endif #endif
{ {
oclMat m(mat);
args.push_back( make_pair( sizeof(cl_mem) , (void*)&m.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads, openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
localThreads, args, -1, -1, compile_option); localThreads, args, -1, -1, buildOptions.c_str());
} }
} }
@ -646,161 +508,16 @@ static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const o
{ {
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols); CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
size_t localThreads[3] = {16, 16, 1}; size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3]; size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
globalThreads[2] = 1;
int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
char compile_option[32];
union sc const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
{ const char channelMap[] = { ' ', ' ', '2', '4', '4' };
cl_uchar4 uval; std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);
cl_char4 cval;
cl_ushort4 usval; oclMat m(Mat(1, 1, dst.type(), scalar));
cl_short4 shval; args.push_back( make_pair( sizeof(cl_mem) , (void *)&m.data ));
cl_int4 ival;
cl_float4 fval;
cl_double4 dval;
} val;
switch(dst.depth())
{
case CV_8U:
val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=uchar");
args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=uchar4");
args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_8S:
val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=char");
args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=char4");
args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_16U:
val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=ushort");
args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=ushort4");
args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_16S:
val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=short");
args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=short4");
args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_32S:
val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=int");
args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=int4");
args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_32F:
val.fval.s[0] = scalar.val[0];
val.fval.s[1] = scalar.val[1];
val.fval.s[2] = scalar.val[2];
val.fval.s[3] = scalar.val[3];
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=float");
args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=float4");
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
case CV_64F:
val.dval.s[0] = scalar.val[0];
val.dval.s[1] = scalar.val[1];
val.dval.s[2] = scalar.val[2];
val.dval.s[3] = scalar.val[3];
switch(dst.oclchannels())
{
case 1:
sprintf(compile_option, "-D GENTYPE=double");
args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
break;
case 4:
sprintf(compile_option, "-D GENTYPE=double4");
args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
}
break;
default:
CV_Error(CV_StsUnsupportedFormat, "unknown depth");
}
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
@ -810,38 +527,21 @@ static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const o
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads, openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
localThreads, args, -1, -1, compile_option); localThreads, args, -1, -1, buildOptions.c_str());
} }
oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask) oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
{ {
//cout << "cv::ocl::oclMat::setTo()" << endl;
CV_Assert(mask.type() == CV_8UC1); CV_Assert(mask.type() == CV_8UC1);
CV_Assert( this->depth() >= 0 && this->depth() <= 6 ); CV_Assert( this->depth() >= 0 && this->depth() <= 6 );
CV_DbgAssert( !this->empty()); CV_DbgAssert( !this->empty());
//cl_int status;
//cl_mem mem;
//mem = clCreateBuffer(this->clCxt->clContext,CL_MEM_READ_WRITE,
// sizeof(double)*4,NULL,&status);
//openCLVerifyCall(status);
//double* s = (double *)scalar.val;
//openCLSafeCall(clEnqueueWriteBuffer(this->clCxt->clCmdQueue,
// (cl_mem)mem,1,0,sizeof(double)*4,s,0,0,0));
if (mask.empty()) if (mask.empty())
{ {
if(type() == CV_8UC1) set_to_withoutmask_run(*this, scalar, type() == CV_8UC1 ?
{ "set_to_without_mask_C1_D0" : "set_to_without_mask");
set_to_withoutmask_run(*this, scalar, "set_to_without_mask_C1_D0");
}
else
{
set_to_withoutmask_run(*this, scalar, "set_to_without_mask");
}
} }
else else
{
set_to_withmask_run(*this, scalar, mask, "set_to_with_mask"); set_to_withmask_run(*this, scalar, mask, "set_to_with_mask");
}
return *this; return *this;
} }
@ -849,91 +549,43 @@ oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
{ {
if( new_rows != 0 && new_rows != rows) if( new_rows != 0 && new_rows != rows)
CV_Error( CV_StsBadFunc, "oclMat's number of rows can not be changed for current version" );
{
CV_Error( CV_StsBadFunc,
"oclMat's number of rows can not be changed for current version" );
}
oclMat hdr = *this; oclMat hdr = *this;
int cn = oclchannels(); int cn = oclchannels();
if (new_cn == 0) if (new_cn == 0)
new_cn = cn; new_cn = cn;
int total_width = cols * cn; int total_width = cols * cn;
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
new_rows = rows * total_width / new_cn; new_rows = rows * total_width / new_cn;
if (new_rows != 0 && new_rows != rows) if (new_rows != 0 && new_rows != rows)
{ {
int total_size = total_width * rows; int total_size = total_width * rows;
if (!isContinuous()) if (!isContinuous())
CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
if ((unsigned)new_rows > (unsigned)total_size) if ((unsigned)new_rows > (unsigned)total_size)
CV_Error(CV_StsOutOfRange, "Bad new number of rows"); CV_Error(CV_StsOutOfRange, "Bad new number of rows");
total_width = total_size / new_rows; total_width = total_size / new_rows;
if (total_width * new_rows != total_size) if (total_width * new_rows != total_size)
CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
hdr.rows = new_rows; hdr.rows = new_rows;
hdr.step = total_width * elemSize1(); hdr.step = total_width * elemSize1();
} }
int new_width = total_width / new_cn; int new_width = total_width / new_cn;
if (new_width * new_cn != total_width) if (new_width * new_cn != total_width)
CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels"); CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");
hdr.cols = new_width; hdr.cols = new_width;
hdr.wholecols = new_width; hdr.wholecols = new_width;
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
return hdr; return hdr;
} }
@ -953,11 +605,6 @@ void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type
clCxt = Context::getContext(); clCxt = Context::getContext();
/* core logic */ /* core logic */
_type &= TYPE_MASK; _type &= TYPE_MASK;
//download_channels = CV_MAT_CN(_type);
//if(download_channels==3)
//{
// _type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4);
//}
if( rows == _rows && cols == _cols && type() == _type && data ) if( rows == _rows && cols == _cols && type() == _type && data )
return; return;
if( data ) if( data )
@ -974,7 +621,6 @@ void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type
void *dev_ptr; void *dev_ptr;
openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type); openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type);
//openCLMallocPitch(clCxt,&dev_ptr, &step, esz * cols, rows);
if (esz * cols == step) if (esz * cols == step)
flags |= Mat::CONTINUOUS_FLAG; flags |= Mat::CONTINUOUS_FLAG;
@ -992,7 +638,6 @@ void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type
void cv::ocl::oclMat::release() void cv::ocl::oclMat::release()
{ {
//cout << "cv::ocl::oclMat::release()" << endl;
if( refcount && CV_XADD(refcount, -1) == 1 ) if( refcount && CV_XADD(refcount, -1) == 1 )
{ {
fastFree(refcount); fastFree(refcount);

View File

@ -34,6 +34,14 @@
// //
// //
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
__kernel void copy_to_with_mask( __kernel void copy_to_with_mask(
__global const GENTYPE* restrict srcMat, __global const GENTYPE* restrict srcMat,
__global GENTYPE* dstMat, __global GENTYPE* dstMat,
@ -47,16 +55,17 @@ __kernel void copy_to_with_mask(
int maskStep, int maskStep,
int maskoffset) int maskoffset)
{ {
int x=get_global_id(0); int x=get_global_id(0);
int y=get_global_id(1); int y=get_global_id(1);
x = x< cols ? x: cols-1;
y = y< rows ? y: rows-1; if (x < cols && y < rows)
int srcidx = mad24(y,srcStep_in_pixel,x+ srcoffset_in_pixel); {
int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel);
int maskidx = mad24(y,maskStep,x+ maskoffset); int maskidx = mad24(y,maskStep,x+ maskoffset);
uchar mask = maskMat[maskidx]; if ( maskMat[maskidx])
if (mask)
{ {
int srcidx = mad24(y,srcStep_in_pixel,x+ srcoffset_in_pixel);
int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel);
dstMat[dstidx] = srcMat[srcidx]; dstMat[dstidx] = srcMat[srcidx];
} }
}
} }

View File

@ -34,17 +34,22 @@
// //
// //
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
__kernel void set_to_without_mask_C1_D0(uchar scalar,__global uchar * dstMat, __kernel void set_to_without_mask_C1_D0(__global uchar * scalar,__global uchar * dstMat,
int cols,int rows,int dstStep_in_pixel,int offset_in_pixel) int cols,int rows,int dstStep_in_pixel,int offset_in_pixel)
{ {
int x=get_global_id(0)<<2; int x=get_global_id(0)<<2;
int y=get_global_id(1); int y=get_global_id(1);
//int addr_start = mad24(y,dstStep_in_pixel,offset_in_pixel);
//int addr_end = mad24(y,dstStep_in_pixel,cols+offset_in_pixel);
int idx = mad24(y,dstStep_in_pixel,x+ offset_in_pixel); int idx = mad24(y,dstStep_in_pixel,x+ offset_in_pixel);
uchar4 out; uchar4 out;
out.x = out.y = out.z = out.w = scalar; out.x = out.y = out.z = out.w = scalar[0];
if ( (x+3 < cols) && (y < rows)&& ((offset_in_pixel&3) == 0)) if ( (x+3 < cols) && (y < rows)&& ((offset_in_pixel&3) == 0))
{ {
@ -77,14 +82,14 @@ __kernel void set_to_without_mask_C1_D0(uchar scalar,__global uchar * dstMat,
} }
} }
__kernel void set_to_without_mask(GENTYPE scalar,__global GENTYPE * dstMat, __kernel void set_to_without_mask(__global GENTYPE * scalar,__global GENTYPE * dstMat,
int cols,int rows,int dstStep_in_pixel,int offset_in_pixel) int cols, int rows, int dstStep_in_pixel, int offset_in_pixel)
{ {
int x=get_global_id(0); int x = get_global_id(0);
int y=get_global_id(1); int y = get_global_id(1);
if ( (x < cols) & (y < rows)) if ( (x < cols) & (y < rows))
{ {
int idx = mad24(y,dstStep_in_pixel,x+ offset_in_pixel); int idx = mad24(y, dstStep_in_pixel, x + offset_in_pixel);
dstMat[idx] = scalar; dstMat[idx] = scalar[0];
} }
} }

View File

@ -33,8 +33,17 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
// //
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
__kernel void set_to_with_mask( __kernel void set_to_with_mask(
GENTYPE scalar, __global GENTYPE * scalar,
__global GENTYPE * dstMat, __global GENTYPE * dstMat,
int cols, int cols,
int rows, int rows,
@ -44,16 +53,16 @@ __kernel void set_to_with_mask(
int maskStep, int maskStep,
int maskoffset) int maskoffset)
{ {
int x=get_global_id(0); int x = get_global_id(0);
int y=get_global_id(1); int y = get_global_id(1);
x = x< cols ? x: cols-1;
y = y< rows ? y: rows-1;
int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel);
int maskidx = mad24(y,maskStep,x+ maskoffset);
uchar mask = maskMat[maskidx];
if (mask)
{
dstMat[dstidx] = scalar;
}
if (x < cols && y < rows)
{
int maskidx = mad24(y,maskStep,x+ maskoffset);
if (maskMat[maskidx])
{
int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel);
dstMat[dstidx] = scalar[0];
}
}
} }

View File

@ -77,7 +77,7 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType, int, bool)
cv::ocl::oclMat gdst_whole; cv::ocl::oclMat gdst_whole;
// ocl mat with roi // ocl mat with roi
cv::ocl::oclMat gmat; cv::ocl::oclMat gsrc;
cv::ocl::oclMat gdst; cv::ocl::oclMat gdst;
virtual void SetUp() virtual void SetUp()
@ -123,7 +123,7 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType, int, bool)
gdst_whole = dst; gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat = mat_roi; gsrc = mat_roi;
} }
}; };
@ -136,7 +136,7 @@ TEST_P(ConvertTo, Accuracy)
random_roi(); random_roi();
mat_roi.convertTo(dst_roi, dst_type); mat_roi.convertTo(dst_roi, dst_type);
gmat.convertTo(gdst, dst_type); gsrc.convertTo(gdst, dst_type);
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), src_depth == CV_64F ? 1.0 : 0.0); EXPECT_MAT_NEAR(dst, Mat(gdst_whole), src_depth == CV_64F ? 1.0 : 0.0);
EXPECT_MAT_NEAR(dst_roi, Mat(gdst), src_depth == CV_64F ? 1.0 : 0.0); EXPECT_MAT_NEAR(dst_roi, Mat(gdst), src_depth == CV_64F ? 1.0 : 0.0);
@ -145,27 +145,20 @@ TEST_P(ConvertTo, Accuracy)
///////////////////////////////////////////copyto///////////////////////////////////////////////////////////// ///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
PARAM_TEST_CASE(CopyToTestBase, MatType, bool) PARAM_TEST_CASE(CopyToTestBase, MatType, int, bool)
{ {
int type;
bool use_roi; bool use_roi;
cv::Mat mat; cv::Mat src, mask, dst;
cv::Mat mask;
cv::Mat dst;
// set up roi // set up roi
int roicols; int roicols,roirows;
int roirows; int srcx, srcy;
int srcx; int dstx, dsty;
int srcy; int maskx,masky;
int dstx;
int dsty;
int maskx;
int masky;
// src mat with roi // src mat with roi
cv::Mat mat_roi; cv::Mat src_roi;
cv::Mat mask_roi; cv::Mat mask_roi;
cv::Mat dst_roi; cv::Mat dst_roi;
@ -173,21 +166,18 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
cv::ocl::oclMat gdst_whole; cv::ocl::oclMat gdst_whole;
// ocl mat with roi // ocl mat with roi
cv::ocl::oclMat gmat; cv::ocl::oclMat gsrc, gdst, gmask;
cv::ocl::oclMat gdst;
cv::ocl::oclMat gmask;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); int type = CV_MAKETYPE(GET_PARAM(0), GET_PARAM(1));
use_roi = GET_PARAM(1); use_roi = GET_PARAM(2);
cv::RNG &rng = TS::ptr()->get_rng(); cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
mat = randomMat(rng, size, type, 5, 16, false); src = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false); dst = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), type, 5, 16, false);
mask = randomMat(rng, size, CV_8UC1, 0, 2, false); mask = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), CV_8UC1, 0, 2, false);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1); cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
} }
@ -198,32 +188,32 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
{ {
// randomize ROI // randomize ROI
cv::RNG &rng = TS::ptr()->get_rng(); cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat.cols); roicols = rng.uniform(1, MIN_VALUE);
roirows = rng.uniform(1, mat.rows); roirows = rng.uniform(1, MIN_VALUE);
srcx = rng.uniform(0, mat.cols - roicols); srcx = rng.uniform(0, src.cols - roicols);
srcy = rng.uniform(0, mat.rows - roirows); srcy = rng.uniform(0, src.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols); dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows); dsty = rng.uniform(0, dst.rows - roirows);
maskx = rng.uniform(0, mask.cols - roicols); maskx = rng.uniform(0, mask.cols - roicols);
masky = rng.uniform(0, mask.rows - roirows); masky = rng.uniform(0, mask.rows - roirows);
} }
else else
{ {
roicols = mat.cols; roicols = src.cols;
roirows = mat.rows; roirows = src.rows;
srcx = srcy = 0; srcx = srcy = 0;
dstx = dsty = 0; dstx = dsty = 0;
maskx = masky = 0; maskx = masky = 0;
} }
mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); src_roi = src(Rect(srcx, srcy, roicols, roirows));
mask_roi = mask(Rect(maskx, masky, roicols, roirows)); mask_roi = mask(Rect(maskx, masky, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
gdst_whole = dst; gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat = mat_roi; gsrc = src_roi;
gmask = mask_roi; gmask = mask_roi;
} }
}; };
@ -236,8 +226,8 @@ TEST_P(CopyTo, Without_mask)
{ {
random_roi(); random_roi();
mat_roi.copyTo(dst_roi); src_roi.copyTo(dst_roi);
gmat.copyTo(gdst); gsrc.copyTo(gdst);
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0); EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0);
} }
@ -249,8 +239,8 @@ TEST_P(CopyTo, With_mask)
{ {
random_roi(); random_roi();
mat_roi.copyTo(dst_roi, mask_roi); src_roi.copyTo(dst_roi, mask_roi);
gmat.copyTo(gdst, gmask); gsrc.copyTo(gdst, gmask);
EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0); EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0);
} }
@ -258,48 +248,47 @@ TEST_P(CopyTo, With_mask)
/////////////////////////////////////////// setTo ///////////////////////////////////////////////////////////// /////////////////////////////////////////// setTo /////////////////////////////////////////////////////////////
PARAM_TEST_CASE(SetToTestBase, MatType, bool) PARAM_TEST_CASE(SetToTestBase, MatType, int, bool)
{ {
int type; int depth, channels;
bool use_roi; bool use_roi;
cv::Scalar val; cv::Scalar val;
cv::Mat mat; cv::Mat src;
cv::Mat mask; cv::Mat mask;
// set up roi // set up roi
int roicols; int roicols, roirows;
int roirows; int srcx, srcy;
int srcx; int maskx, masky;
int srcy;
int maskx;
int masky;
// src mat with roi // src mat with roi
cv::Mat mat_roi; cv::Mat src_roi;
cv::Mat mask_roi; cv::Mat mask_roi;
// ocl dst mat for testing // ocl dst mat for testing
cv::ocl::oclMat gmat_whole; cv::ocl::oclMat gsrc_whole;
// ocl mat with roi // ocl mat with roi
cv::ocl::oclMat gmat; cv::ocl::oclMat gsrc;
cv::ocl::oclMat gmask; cv::ocl::oclMat gmask;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); depth = GET_PARAM(0);
use_roi = GET_PARAM(1); channels = GET_PARAM(1);
use_roi = GET_PARAM(2);
cv::RNG &rng = TS::ptr()->get_rng(); cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT); int type = CV_MAKE_TYPE(depth, channels);
mat = randomMat(rng, size, type, 5, 16, false); src = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
mask = randomMat(rng, size, CV_8UC1, 0, 2, false); mask = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), CV_8UC1, 0, 2, false);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1); cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0),
rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
} }
void random_roi() void random_roi()
@ -308,26 +297,26 @@ PARAM_TEST_CASE(SetToTestBase, MatType, bool)
{ {
// randomize ROI // randomize ROI
cv::RNG &rng = TS::ptr()->get_rng(); cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat.cols); roicols = rng.uniform(1, MIN_VALUE);
roirows = rng.uniform(1, mat.rows); roirows = rng.uniform(1, MIN_VALUE);
srcx = rng.uniform(0, mat.cols - roicols); srcx = rng.uniform(0, src.cols - roicols);
srcy = rng.uniform(0, mat.rows - roirows); srcy = rng.uniform(0, src.rows - roirows);
maskx = rng.uniform(0, mask.cols - roicols); maskx = rng.uniform(0, mask.cols - roicols);
masky = rng.uniform(0, mask.rows - roirows); masky = rng.uniform(0, mask.rows - roirows);
} }
else else
{ {
roicols = mat.cols; roicols = src.cols;
roirows = mat.rows; roirows = src.rows;
srcx = srcy = 0; srcx = srcy = 0;
maskx = masky = 0; maskx = masky = 0;
} }
mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); src_roi = src(Rect(srcx, srcy, roicols, roirows));
mask_roi = mask(Rect(maskx, masky, roicols, roirows)); mask_roi = mask(Rect(maskx, masky, roicols, roirows));
gmat_whole = mat; gsrc_whole = src;
gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); gsrc = gsrc_whole(Rect(srcx, srcy, roicols, roirows));
gmask = mask_roi; gmask = mask_roi;
} }
@ -341,10 +330,10 @@ TEST_P(SetTo, Without_mask)
{ {
random_roi(); random_roi();
mat_roi.setTo(val); src_roi.setTo(val);
gmat.setTo(val); gsrc.setTo(val);
EXPECT_MAT_NEAR(mat, Mat(gmat_whole), 1.); EXPECT_MAT_NEAR(src, Mat(gsrc_whole), 1.);
} }
} }
@ -354,10 +343,10 @@ TEST_P(SetTo, With_mask)
{ {
random_roi(); random_roi();
mat_roi.setTo(val, mask_roi); src_roi.setTo(val, mask_roi);
gmat.setTo(val, gmask); gsrc.setTo(val, gmask);
EXPECT_MAT_NEAR(mat, Mat(gmat_whole), 1.); EXPECT_MAT_NEAR(src, Mat(gsrc_whole), 1.);
} }
} }
@ -431,12 +420,12 @@ INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
Range(1, 5), Bool())); Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
Bool())); testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
Bool())); testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine( INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine(
Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),