From 362a67a6959f19ba27cfda7b3cb296c4d02e35ef Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 20 Sep 2013 14:22:18 +0400 Subject: [PATCH] fixed ocl::oclMat::setTo for 2-channel images --- modules/ocl/src/matrix_operations.cpp | 443 ++------------------- modules/ocl/src/opencl/operator_copyToM.cl | 25 +- modules/ocl/src/opencl/operator_setTo.cl | 25 +- modules/ocl/src/opencl/operator_setToM.cl | 33 +- modules/ocl/test/test_matrix_operation.cpp | 143 +++---- 5 files changed, 163 insertions(+), 506 deletions(-) diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 61b6df896..ff52b8a55 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -445,200 +445,62 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be /////////////////////////////////////////////////////////////////////////// //////////////////////////////// setTo //////////////////////////////////// /////////////////////////////////////////////////////////////////////////// + oclMat &cv::ocl::oclMat::operator = (const Scalar &s) { setTo(s); return *this; } + static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kernelName) { vector > args; size_t localThreads[3] = {16, 16, 1}; - size_t globalThreads[3]; - globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; - globalThreads[2] = 1; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); - if(dst.type() == CV_8UC1) - { + + if (dst.type() == CV_8UC1) globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - } - char compile_option[32]; - union sc - { - cl_uchar4 uval; - cl_char4 cval; - cl_ushort4 usval; - cl_short4 shval; - cl_int4 ival; - cl_float4 fval; - cl_double4 dval; - } val; - switch(dst.depth()) - { - case CV_8U: - val.uval.s[0] = saturate_cast(scalar.val[0]); - val.uval.s[1] = saturate_cast(scalar.val[1]); - val.uval.s[2] = saturate_cast(scalar.val[2]); - val.uval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=uchar"); - args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=uchar4"); - args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_8S: - val.cval.s[0] = saturate_cast(scalar.val[0]); - val.cval.s[1] = saturate_cast(scalar.val[1]); - val.cval.s[2] = saturate_cast(scalar.val[2]); - val.cval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=char"); - args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=char4"); - args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_16U: - val.usval.s[0] = saturate_cast(scalar.val[0]); - val.usval.s[1] = saturate_cast(scalar.val[1]); - val.usval.s[2] = saturate_cast(scalar.val[2]); - val.usval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=ushort"); - args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=ushort4"); - args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_16S: - val.shval.s[0] = saturate_cast(scalar.val[0]); - val.shval.s[1] = saturate_cast(scalar.val[1]); - val.shval.s[2] = saturate_cast(scalar.val[2]); - val.shval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=short"); - args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=short4"); - args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_32S: - val.ival.s[0] = saturate_cast(scalar.val[0]); - val.ival.s[1] = saturate_cast(scalar.val[1]); - val.ival.s[2] = saturate_cast(scalar.val[2]); - val.ival.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=int"); - args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); - break; - case 2: - sprintf(compile_option, "-D GENTYPE=int2"); - cl_int2 i2val; - i2val.s[0] = val.ival.s[0]; - i2val.s[1] = val.ival.s[1]; - args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=int4"); - args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_32F: - val.fval.s[0] = scalar.val[0]; - val.fval.s[1] = scalar.val[1]; - val.fval.s[2] = scalar.val[2]; - val.fval.s[3] = scalar.val[3]; - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=float"); - args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=float4"); - args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_64F: - val.dval.s[0] = scalar.val[0]; - val.dval.s[1] = scalar.val[1]; - val.dval.s[2] = scalar.val[2]; - val.dval.s[3] = scalar.val[3]; - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=double"); - args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=double4"); - args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unknown depth"); - } + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char channelMap[] = { ' ', ' ', '2', '4', '4' }; + std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]); + + Mat mat(1, 1, dst.type(), scalar); + #ifdef CL_VERSION_1_2 - //this enables backwards portability to - //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support - if(Context::getContext()->supportsFeature(Context::CL_VER_1_2) && + // this enables backwards portability to + // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support + if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) && dst.offset == 0 && dst.cols == dst.wholecols) { + const int sizeofMap[][7] = + { + { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, + { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, + { 0 , 0 , 0 , 0 , 0 , 0 , 0 }, + { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, + }; + int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; + clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), - (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); + (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, + 0, dst.step * dst.rows, 0, NULL, NULL); } else #endif { + oclMat m(mat); + args.push_back( make_pair( sizeof(cl_mem) , (void*)&m.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel )); + openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads, - localThreads, args, -1, -1, compile_option); + localThreads, args, -1, -1, buildOptions.c_str()); } } @@ -646,161 +508,16 @@ static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const o { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols); vector > args; - size_t localThreads[3] = {16, 16, 1}; - size_t globalThreads[3]; - globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; - globalThreads[2] = 1; + size_t localThreads[3] = { 16, 16, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); - char compile_option[32]; - union sc - { - cl_uchar4 uval; - cl_char4 cval; - cl_ushort4 usval; - cl_short4 shval; - cl_int4 ival; - cl_float4 fval; - cl_double4 dval; - } val; - switch(dst.depth()) - { - case CV_8U: - val.uval.s[0] = saturate_cast(scalar.val[0]); - val.uval.s[1] = saturate_cast(scalar.val[1]); - val.uval.s[2] = saturate_cast(scalar.val[2]); - val.uval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=uchar"); - args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=uchar4"); - args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_8S: - val.cval.s[0] = saturate_cast(scalar.val[0]); - val.cval.s[1] = saturate_cast(scalar.val[1]); - val.cval.s[2] = saturate_cast(scalar.val[2]); - val.cval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=char"); - args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=char4"); - args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_16U: - val.usval.s[0] = saturate_cast(scalar.val[0]); - val.usval.s[1] = saturate_cast(scalar.val[1]); - val.usval.s[2] = saturate_cast(scalar.val[2]); - val.usval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=ushort"); - args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=ushort4"); - args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_16S: - val.shval.s[0] = saturate_cast(scalar.val[0]); - val.shval.s[1] = saturate_cast(scalar.val[1]); - val.shval.s[2] = saturate_cast(scalar.val[2]); - val.shval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=short"); - args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=short4"); - args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_32S: - val.ival.s[0] = saturate_cast(scalar.val[0]); - val.ival.s[1] = saturate_cast(scalar.val[1]); - val.ival.s[2] = saturate_cast(scalar.val[2]); - val.ival.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=int"); - args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=int4"); - args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_32F: - val.fval.s[0] = scalar.val[0]; - val.fval.s[1] = scalar.val[1]; - val.fval.s[2] = scalar.val[2]; - val.fval.s[3] = scalar.val[3]; - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=float"); - args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=float4"); - args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_64F: - val.dval.s[0] = scalar.val[0]; - val.dval.s[1] = scalar.val[1]; - val.dval.s[2] = scalar.val[2]; - val.dval.s[3] = scalar.val[3]; - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=double"); - args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=double4"); - args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); - } - break; - default: - CV_Error(CV_StsUnsupportedFormat, "unknown depth"); - } + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char channelMap[] = { ' ', ' ', '2', '4', '4' }; + std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]); + + oclMat m(Mat(1, 1, dst.type(), scalar)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&m.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); @@ -810,38 +527,21 @@ static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const o args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads, - localThreads, args, -1, -1, compile_option); + localThreads, args, -1, -1, buildOptions.c_str()); } oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask) { - //cout << "cv::ocl::oclMat::setTo()" << endl; CV_Assert(mask.type() == CV_8UC1); CV_Assert( this->depth() >= 0 && this->depth() <= 6 ); CV_DbgAssert( !this->empty()); - //cl_int status; - //cl_mem mem; - //mem = clCreateBuffer(this->clCxt->clContext,CL_MEM_READ_WRITE, - // sizeof(double)*4,NULL,&status); - //openCLVerifyCall(status); - //double* s = (double *)scalar.val; - //openCLSafeCall(clEnqueueWriteBuffer(this->clCxt->clCmdQueue, - // (cl_mem)mem,1,0,sizeof(double)*4,s,0,0,0)); if (mask.empty()) { - if(type() == CV_8UC1) - { - set_to_withoutmask_run(*this, scalar, "set_to_without_mask_C1_D0"); - } - else - { - set_to_withoutmask_run(*this, scalar, "set_to_without_mask"); - } + set_to_withoutmask_run(*this, scalar, type() == CV_8UC1 ? + "set_to_without_mask_C1_D0" : "set_to_without_mask"); } else - { set_to_withmask_run(*this, scalar, mask, "set_to_with_mask"); - } return *this; } @@ -849,91 +549,43 @@ oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask) oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const { if( new_rows != 0 && new_rows != rows) - - { - - CV_Error( CV_StsBadFunc, - - "oclMat's number of rows can not be changed for current version" ); - - } + CV_Error( CV_StsBadFunc, "oclMat's number of rows can not be changed for current version" ); oclMat hdr = *this; int cn = oclchannels(); - if (new_cn == 0) - new_cn = cn; - - int total_width = cols * cn; - - - if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) - new_rows = rows * total_width / new_cn; - - if (new_rows != 0 && new_rows != rows) - { - int total_size = total_width * rows; - - if (!isContinuous()) - CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); - - if ((unsigned)new_rows > (unsigned)total_size) - CV_Error(CV_StsOutOfRange, "Bad new number of rows"); - - total_width = total_size / new_rows; - - - if (total_width * new_rows != total_size) - CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); - - hdr.rows = new_rows; - hdr.step = total_width * elemSize1(); - } - - int new_width = total_width / new_cn; - - - if (new_width * new_cn != total_width) - CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels"); - - hdr.cols = new_width; - hdr.wholecols = new_width; - hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); - - - return hdr; } @@ -953,11 +605,6 @@ void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type clCxt = Context::getContext(); /* core logic */ _type &= TYPE_MASK; - //download_channels = CV_MAT_CN(_type); - //if(download_channels==3) - //{ - // _type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4); - //} if( rows == _rows && cols == _cols && type() == _type && data ) return; if( data ) @@ -974,7 +621,6 @@ void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type void *dev_ptr; openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type); - //openCLMallocPitch(clCxt,&dev_ptr, &step, esz * cols, rows); if (esz * cols == step) flags |= Mat::CONTINUOUS_FLAG; @@ -992,7 +638,6 @@ void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type void cv::ocl::oclMat::release() { - //cout << "cv::ocl::oclMat::release()" << endl; if( refcount && CV_XADD(refcount, -1) == 1 ) { fastFree(refcount); diff --git a/modules/ocl/src/opencl/operator_copyToM.cl b/modules/ocl/src/opencl/operator_copyToM.cl index c49c6a323..69b5ea4ab 100644 --- a/modules/ocl/src/opencl/operator_copyToM.cl +++ b/modules/ocl/src/opencl/operator_copyToM.cl @@ -34,6 +34,14 @@ // // +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif + __kernel void copy_to_with_mask( __global const GENTYPE* restrict srcMat, __global GENTYPE* dstMat, @@ -47,16 +55,17 @@ __kernel void copy_to_with_mask( int maskStep, int maskoffset) { - int x=get_global_id(0); - int y=get_global_id(1); - x = x< cols ? x: cols-1; - y = y< rows ? y: rows-1; - int srcidx = mad24(y,srcStep_in_pixel,x+ srcoffset_in_pixel); - int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel); + int x=get_global_id(0); + int y=get_global_id(1); + + if (x < cols && y < rows) + { int maskidx = mad24(y,maskStep,x+ maskoffset); - uchar mask = maskMat[maskidx]; - if (mask) + if ( maskMat[maskidx]) { + int srcidx = mad24(y,srcStep_in_pixel,x+ srcoffset_in_pixel); + int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel); dstMat[dstidx] = srcMat[srcidx]; } + } } diff --git a/modules/ocl/src/opencl/operator_setTo.cl b/modules/ocl/src/opencl/operator_setTo.cl index 0075dc5b5..1d2ad6597 100644 --- a/modules/ocl/src/opencl/operator_setTo.cl +++ b/modules/ocl/src/opencl/operator_setTo.cl @@ -34,17 +34,22 @@ // // +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif -__kernel void set_to_without_mask_C1_D0(uchar scalar,__global uchar * dstMat, +__kernel void set_to_without_mask_C1_D0(__global uchar * scalar,__global uchar * dstMat, int cols,int rows,int dstStep_in_pixel,int offset_in_pixel) { int x=get_global_id(0)<<2; int y=get_global_id(1); - //int addr_start = mad24(y,dstStep_in_pixel,offset_in_pixel); - //int addr_end = mad24(y,dstStep_in_pixel,cols+offset_in_pixel); int idx = mad24(y,dstStep_in_pixel,x+ offset_in_pixel); uchar4 out; - out.x = out.y = out.z = out.w = scalar; + out.x = out.y = out.z = out.w = scalar[0]; if ( (x+3 < cols) && (y < rows)&& ((offset_in_pixel&3) == 0)) { @@ -77,14 +82,14 @@ __kernel void set_to_without_mask_C1_D0(uchar scalar,__global uchar * dstMat, } } -__kernel void set_to_without_mask(GENTYPE scalar,__global GENTYPE * dstMat, - int cols,int rows,int dstStep_in_pixel,int offset_in_pixel) +__kernel void set_to_without_mask(__global GENTYPE * scalar,__global GENTYPE * dstMat, + int cols, int rows, int dstStep_in_pixel, int offset_in_pixel) { - int x=get_global_id(0); - int y=get_global_id(1); + int x = get_global_id(0); + int y = get_global_id(1); if ( (x < cols) & (y < rows)) { - int idx = mad24(y,dstStep_in_pixel,x+ offset_in_pixel); - dstMat[idx] = scalar; + int idx = mad24(y, dstStep_in_pixel, x + offset_in_pixel); + dstMat[idx] = scalar[0]; } } diff --git a/modules/ocl/src/opencl/operator_setToM.cl b/modules/ocl/src/opencl/operator_setToM.cl index dde12d86f..a1cb092f8 100644 --- a/modules/ocl/src/opencl/operator_setToM.cl +++ b/modules/ocl/src/opencl/operator_setToM.cl @@ -33,8 +33,17 @@ // the use of this software, even if advised of the possibility of such damage. // // + +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif + __kernel void set_to_with_mask( - GENTYPE scalar, + __global GENTYPE * scalar, __global GENTYPE * dstMat, int cols, int rows, @@ -44,16 +53,16 @@ __kernel void set_to_with_mask( int maskStep, int maskoffset) { - int x=get_global_id(0); - int y=get_global_id(1); - x = x< cols ? x: cols-1; - y = y< rows ? y: rows-1; - int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel); - int maskidx = mad24(y,maskStep,x+ maskoffset); - uchar mask = maskMat[maskidx]; - if (mask) - { - dstMat[dstidx] = scalar; - } + int x = get_global_id(0); + int y = get_global_id(1); + if (x < cols && y < rows) + { + int maskidx = mad24(y,maskStep,x+ maskoffset); + if (maskMat[maskidx]) + { + int dstidx = mad24(y,dstStep_in_pixel,x+ dstoffset_in_pixel); + dstMat[dstidx] = scalar[0]; + } + } } diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp index c5fcdce2c..b70ee6ccd 100644 --- a/modules/ocl/test/test_matrix_operation.cpp +++ b/modules/ocl/test/test_matrix_operation.cpp @@ -77,7 +77,7 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType, int, bool) cv::ocl::oclMat gdst_whole; // ocl mat with roi - cv::ocl::oclMat gmat; + cv::ocl::oclMat gsrc; cv::ocl::oclMat gdst; virtual void SetUp() @@ -123,7 +123,7 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType, int, bool) gdst_whole = dst; gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat = mat_roi; + gsrc = mat_roi; } }; @@ -136,7 +136,7 @@ TEST_P(ConvertTo, Accuracy) random_roi(); mat_roi.convertTo(dst_roi, dst_type); - gmat.convertTo(gdst, dst_type); + gsrc.convertTo(gdst, dst_type); EXPECT_MAT_NEAR(dst, Mat(gdst_whole), src_depth == CV_64F ? 1.0 : 0.0); EXPECT_MAT_NEAR(dst_roi, Mat(gdst), src_depth == CV_64F ? 1.0 : 0.0); @@ -145,27 +145,20 @@ TEST_P(ConvertTo, Accuracy) ///////////////////////////////////////////copyto///////////////////////////////////////////////////////////// -PARAM_TEST_CASE(CopyToTestBase, MatType, bool) +PARAM_TEST_CASE(CopyToTestBase, MatType, int, bool) { - int type; bool use_roi; - cv::Mat mat; - cv::Mat mask; - cv::Mat dst; + cv::Mat src, mask, dst; // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - int maskx; - int masky; + int roicols,roirows; + int srcx, srcy; + int dstx, dsty; + int maskx,masky; // src mat with roi - cv::Mat mat_roi; + cv::Mat src_roi; cv::Mat mask_roi; cv::Mat dst_roi; @@ -173,21 +166,18 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool) cv::ocl::oclMat gdst_whole; // ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gmask; + cv::ocl::oclMat gsrc, gdst, gmask; virtual void SetUp() { - type = GET_PARAM(0); - use_roi = GET_PARAM(1); + int type = CV_MAKETYPE(GET_PARAM(0), GET_PARAM(1)); + use_roi = GET_PARAM(2); cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + src = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); + dst = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), type, 5, 16, false); + mask = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), CV_8UC1, 0, 2, false); cv::threshold(mask, mask, 0.5, 255., CV_8UC1); } @@ -198,32 +188,32 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool) { // randomize ROI cv::RNG &rng = TS::ptr()->get_rng(); - roicols = rng.uniform(1, mat.cols); - roirows = rng.uniform(1, mat.rows); - srcx = rng.uniform(0, mat.cols - roicols); - srcy = rng.uniform(0, mat.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); + roicols = rng.uniform(1, MIN_VALUE); + roirows = rng.uniform(1, MIN_VALUE); + srcx = rng.uniform(0, src.cols - roicols); + srcy = rng.uniform(0, src.rows - roirows); + dstx = rng.uniform(0, dst.cols - roicols); + dsty = rng.uniform(0, dst.rows - roirows); maskx = rng.uniform(0, mask.cols - roicols); masky = rng.uniform(0, mask.rows - roirows); } else { - roicols = mat.cols; - roirows = mat.rows; + roicols = src.cols; + roirows = src.rows; srcx = srcy = 0; dstx = dsty = 0; maskx = masky = 0; } - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); + src_roi = src(Rect(srcx, srcy, roicols, roirows)); mask_roi = mask(Rect(maskx, masky, roicols, roirows)); dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); gdst_whole = dst; gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat = mat_roi; + gsrc = src_roi; gmask = mask_roi; } }; @@ -236,8 +226,8 @@ TEST_P(CopyTo, Without_mask) { random_roi(); - mat_roi.copyTo(dst_roi); - gmat.copyTo(gdst); + src_roi.copyTo(dst_roi); + gsrc.copyTo(gdst); EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0); } @@ -249,8 +239,8 @@ TEST_P(CopyTo, With_mask) { random_roi(); - mat_roi.copyTo(dst_roi, mask_roi); - gmat.copyTo(gdst, gmask); + src_roi.copyTo(dst_roi, mask_roi); + gsrc.copyTo(gdst, gmask); EXPECT_MAT_NEAR(dst, Mat(gdst_whole), 0.0); } @@ -258,48 +248,47 @@ TEST_P(CopyTo, With_mask) /////////////////////////////////////////// setTo ///////////////////////////////////////////////////////////// -PARAM_TEST_CASE(SetToTestBase, MatType, bool) +PARAM_TEST_CASE(SetToTestBase, MatType, int, bool) { - int type; + int depth, channels; bool use_roi; cv::Scalar val; - cv::Mat mat; + cv::Mat src; cv::Mat mask; // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int maskx; - int masky; + int roicols, roirows; + int srcx, srcy; + int maskx, masky; // src mat with roi - cv::Mat mat_roi; + cv::Mat src_roi; cv::Mat mask_roi; // ocl dst mat for testing - cv::ocl::oclMat gmat_whole; + cv::ocl::oclMat gsrc_whole; // ocl mat with roi - cv::ocl::oclMat gmat; + cv::ocl::oclMat gsrc; cv::ocl::oclMat gmask; virtual void SetUp() { - type = GET_PARAM(0); - use_roi = GET_PARAM(1); + depth = GET_PARAM(0); + channels = GET_PARAM(1); + use_roi = GET_PARAM(2); cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + int type = CV_MAKE_TYPE(depth, channels); - mat = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + src = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); + mask = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), CV_8UC1, 0, 2, false); cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), + rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); } void random_roi() @@ -308,26 +297,26 @@ PARAM_TEST_CASE(SetToTestBase, MatType, bool) { // randomize ROI cv::RNG &rng = TS::ptr()->get_rng(); - roicols = rng.uniform(1, mat.cols); - roirows = rng.uniform(1, mat.rows); - srcx = rng.uniform(0, mat.cols - roicols); - srcy = rng.uniform(0, mat.rows - roirows); + roicols = rng.uniform(1, MIN_VALUE); + roirows = rng.uniform(1, MIN_VALUE); + srcx = rng.uniform(0, src.cols - roicols); + srcy = rng.uniform(0, src.rows - roirows); maskx = rng.uniform(0, mask.cols - roicols); masky = rng.uniform(0, mask.rows - roirows); } else { - roicols = mat.cols; - roirows = mat.rows; + roicols = src.cols; + roirows = src.rows; srcx = srcy = 0; maskx = masky = 0; } - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); + src_roi = src(Rect(srcx, srcy, roicols, roirows)); mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); + gsrc_whole = src; + gsrc = gsrc_whole(Rect(srcx, srcy, roicols, roirows)); gmask = mask_roi; } @@ -341,10 +330,10 @@ TEST_P(SetTo, Without_mask) { random_roi(); - mat_roi.setTo(val); - gmat.setTo(val); + src_roi.setTo(val); + gsrc.setTo(val); - EXPECT_MAT_NEAR(mat, Mat(gmat_whole), 1.); + EXPECT_MAT_NEAR(src, Mat(gsrc_whole), 1.); } } @@ -354,10 +343,10 @@ TEST_P(SetTo, With_mask) { random_roi(); - mat_roi.setTo(val, mask_roi); - gmat.setTo(val, gmask); + src_roi.setTo(val, mask_roi); + gsrc.setTo(val, gmask); - EXPECT_MAT_NEAR(mat, Mat(gmat_whole), 1.); + EXPECT_MAT_NEAR(src, Mat(gsrc_whole), 1.); } } @@ -431,12 +420,12 @@ INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine( Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), - Bool())); + Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), + testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), - Bool())); + Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), + testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine( Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),