diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index dc90ac447..3081676f5 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" #ifdef HAVE_IPP @@ -2154,20 +2155,61 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst, typedef void (*ScaleAddFunc)(const uchar* src1, const uchar* src2, uchar* dst, int len, const void* alpha); +static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type ) +{ + int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + Size size = _src1.size(); + + if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() ) + return false; + + char cvt[2][50]; + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, + format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s" + " -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s%s", ocl::typeToStr(depth), + ocl::typeToStr(wdepth), ocl::convertTypeStr(depth, wdepth, 1, cvt[0]), + ocl::convertTypeStr(wdepth, depth, 1, cvt[1]), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + _dst.create(size, type); + UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(); + + ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), + src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), + dstarg = ocl::KernelArg::WriteOnly(dst, cn); + + if (wdepth == CV_32F) + k.args(src1arg, src2arg, dstarg, (float)alpha); + else + k.args(src1arg, src2arg, dstarg, alpha); + + size_t globalsize[2] = { dst.cols * cn, dst.rows }; + return k.run(2, globalsize, NULL, false); +} + } void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst ) { - Mat src1 = _src1.getMat(), src2 = _src2.getMat(); - int depth = src1.depth(), cn = src1.channels(); + int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert( type == _src2.type() ); + + if (ocl::useOpenCL() && _src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat() && + ocl_scaleAdd(_src1, alpha, _src2, _dst, type)) + return; - CV_Assert( src1.type() == src2.type() ); if( depth < CV_32F ) { addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth); return; } + Mat src1 = _src1.getMat(), src2 = _src2.getMat(); + CV_Assert(src1.size == src2.size); + _dst.create(src1.dims, src1.size, src1.type()); Mat dst = _dst.getMat(); diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index add4b0695..605fe4785 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -91,6 +91,9 @@ #else + #ifndef convertToWT2 + #define convertToWT2 convertToWT1 + #endif #define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index)) #define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index)) @@ -230,6 +233,11 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) workT value = srcelem1 * alpha + beta; \ dstelem = convertToDT(value >= 0 ? value : -value) +#elif defined OP_SCALE_ADD +#undef EXTRA_PARAMS +#define EXTRA_PARAMS , workT alpha +#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * alpha + srcelem2) + #elif defined OP_CTP_AD || defined OP_CTP_AR #ifdef OP_CTP_AD #define TO_DEGREE cartToPolar *= (180 / CV_PI); diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index df692b818..f2b987514 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1323,7 +1323,6 @@ OCL_TEST_P(InRange, Scalar) } } - //////////////////////////////// ConvertScaleAbs //////////////////////////////////////////////// typedef ArithmTestBase ConvertScaleAbs; @@ -1341,6 +1340,23 @@ OCL_TEST_P(ConvertScaleAbs, Mat) } } +//////////////////////////////// ScaleAdd //////////////////////////////////////////////// + +typedef ArithmTestBase ScaleAdd; + +OCL_TEST_P(ScaleAdd, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::scaleAdd(src1_roi, val[0], src2_roi, dst1_roi)); + OCL_ON(cv::scaleAdd(usrc1_roi, val[0], usrc2_roi, udst1_roi)); + + Near(depth <= CV_32S ? 1 : 1e-6); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); @@ -1378,6 +1394,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_6 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); } } // namespace cvtest::ocl