diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 359d27222..449303cc3 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -915,11 +915,12 @@ void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t bl enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4, OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8, - OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14 }; + OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14, + OCL_OP_RDIV_SCALE=15 }; static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF", "OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE", - "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", 0 }; + "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", "OP_RDIV_SCALE", 0 }; static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, bool bitwise, int oclop, bool haveScalar ) @@ -1301,25 +1302,27 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, int kercn = haveMask || haveScalar ? cn : 1; - char cvtstr[3][32], opts[1024]; + char cvtstr[4][32], opts[1024]; sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s " - "-D dstT=%s -D workT=%s -D convertToWT1=%s " + "-D dstT=%s -D workT=%s -D scaleT=%s -D convertToWT1=%s " "-D convertToWT2=%s -D convertToDT=%s%s", (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), + ocl::typeToStr(CV_MAKETYPE(wdepth, 1)), ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]), doubleSupport ? " -D DOUBLE_SUPPORT" : ""); + size_t usrdata_esz = CV_ELEM_SIZE(wdepth); const uchar* usrdata_p = (const uchar*)usrdata; const double* usrdata_d = (const double*)usrdata; float usrdata_f[3]; int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE || - oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0; + oclop == OCL_OP_RDIV_SCALE || oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0; if( n > 0 && wdepth == CV_32F ) { for( i = 0; i < n; i++ ) @@ -1352,13 +1355,20 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz); if( !haveMask ) - k.args(src1arg, dstarg, scalararg); + { + if(n == 0) + k.args(src1arg, dstarg, scalararg); + else if(n == 1) + k.args(src1arg, dstarg, scalararg, + ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz)); + else + CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters"); + } else k.args(src1arg, maskarg, dstarg, scalararg); } else { - size_t usrdata_esz = CV_ELEM_SIZE(wdepth); src2 = _src2.getUMat(); ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale); @@ -1439,6 +1449,8 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, swapped12 = true; if( oclop == OCL_OP_SUB ) oclop = OCL_OP_RSUB; + if ( oclop == OCL_OP_DIV_SCALE ) + oclop = OCL_OP_RDIV_SCALE; } else if( !checkScalar(*psrc2, type1, kind2, kind1) ) CV_Error( CV_StsUnmatchedSizes, diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 9c86057ca..1647e8d19 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -136,8 +136,12 @@ #elif defined OP_MUL_SCALE #undef EXTRA_PARAMS -#define EXTRA_PARAMS , workT scale -#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2 * scale) +#ifdef UNARY_OP +#define EXTRA_PARAMS , workT srcelem2, scaleT scale +#else +#define EXTRA_PARAMS , scaleT scale +#endif +#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * scale * srcelem2) #elif defined OP_DIV #define PROCESS_ELEM \ @@ -146,21 +150,36 @@ #elif defined OP_DIV_SCALE #undef EXTRA_PARAMS -#define EXTRA_PARAMS , workT scale +#ifdef UNARY_OP +#define EXTRA_PARAMS , workT srcelem2, scaleT scale +#else +#define EXTRA_PARAMS , scaleT scale +#endif #define PROCESS_ELEM \ workT e2 = srcelem2, zero = (workT)(0); \ - dstelem = convertToDT(e2 != zero ? srcelem1 * scale / e2 : zero) + dstelem = convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)) + +#elif defined OP_RDIV_SCALE +#undef EXTRA_PARAMS +#ifdef UNARY_OP +#define EXTRA_PARAMS , workT srcelem2, scaleT scale +#else +#define EXTRA_PARAMS , scaleT scale +#endif +#define PROCESS_ELEM \ + workT e1 = srcelem1, zero = (workT)(0); \ + dstelem = convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)) #elif defined OP_RECIP_SCALE #undef EXTRA_PARAMS -#define EXTRA_PARAMS , workT scale +#define EXTRA_PARAMS , scaleT scale #define PROCESS_ELEM \ workT e1 = srcelem1, zero = (workT)(0); \ dstelem = convertToDT(e1 != zero ? scale / e1 : zero) #elif defined OP_ADDW #undef EXTRA_PARAMS -#define EXTRA_PARAMS , workT alpha, workT beta, workT gamma +#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma #define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma) #elif defined OP_MAG @@ -260,7 +279,8 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) #undef srcelem2 #if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \ defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \ - defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW + defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \ + defined OP_MUL || defined OP_DIV #undef EXTRA_PARAMS #define EXTRA_PARAMS , workT srcelem2 #endif diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 7a24f317a..0c37c7116 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -293,7 +293,7 @@ OCL_TEST_P(Mul, Mat) } } -OCL_TEST_P(Mul, DISABLED_Scalar) +OCL_TEST_P(Mul, Scalar) { for (int j = 0; j < test_loop_times; j++) { @@ -306,7 +306,7 @@ OCL_TEST_P(Mul, DISABLED_Scalar) } } -OCL_TEST_P(Mul, DISABLED_Mat_Scale) +OCL_TEST_P(Mul, Mat_Scale) { for (int j = 0; j < test_loop_times; j++) { @@ -319,6 +319,20 @@ OCL_TEST_P(Mul, DISABLED_Mat_Scale) } } +OCL_TEST_P(Mul, Mat_Scalar_Scale) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::multiply(src1_roi, val, dst1_roi, val[0])); + OCL_ON(cv::multiply(usrc1_roi, val, udst1_roi, val[0])); + + Near(udst1_roi.depth() >= CV_32F ? 1e-2 : 1); + } +} + + //////////////////////////////// Div ///////////////////////////////////////////////// typedef ArithmTestBase Div; @@ -335,7 +349,7 @@ OCL_TEST_P(Div, Mat) } } -OCL_TEST_P(Div, DISABLED_Scalar) +OCL_TEST_P(Div, Scalar) { for (int j = 0; j < test_loop_times; j++) { @@ -348,6 +362,19 @@ OCL_TEST_P(Div, DISABLED_Scalar) } } +OCL_TEST_P(Div, Scalar2) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::divide(src1_roi, val, dst1_roi)); + OCL_ON(cv::divide(usrc1_roi, val, udst1_roi)); + + Near(udst1_roi.depth() >= CV_32F ? 1e-3 : 1); + } +} + OCL_TEST_P(Div, Mat_Scale) { for (int j = 0; j < test_loop_times; j++) @@ -361,8 +388,7 @@ OCL_TEST_P(Div, Mat_Scale) } } - -OCL_TEST_P(Div, DISABLED_Mat_Scalar_Scale) +OCL_TEST_P(Div, Mat_Scalar_Scale) { for (int j = 0; j < test_loop_times; j++) { @@ -375,6 +401,19 @@ OCL_TEST_P(Div, DISABLED_Mat_Scalar_Scale) } } +OCL_TEST_P(Div, Recip) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::divide(val[0], src1_roi, dst1_roi)); + OCL_ON(cv::divide(val[0], usrc1_roi, udst1_roi)); + + Near(udst1_roi.depth() >= CV_32F ? 1e-3 : 1); + } +} + //////////////////////////////// Min/Max ///////////////////////////////////////////////// typedef ArithmTestBase Min;