From 98b72ff389737ab7c99a1b7b18b4eef19195b29e Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 3 Feb 2014 17:38:28 +0400 Subject: [PATCH] optimized cv::pow for integer power --- modules/core/src/mathfuncs.cpp | 54 +++++++++++++++++---------- modules/core/src/opencl/arithm.cl | 7 +++- modules/core/test/ocl/test_arithm.cpp | 16 ++++---- 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index b2a080a73..f81e83553 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -2033,17 +2033,17 @@ static IPowFunc ipowTab[] = #ifdef HAVE_OPENCL -static bool ocl_pow(InputArray _src, double power, OutputArray _dst) +static bool ocl_pow(InputArray _src, double power, OutputArray _dst, + bool is_ipower, int ipower) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - if ( !(_src.dims() <= 2 && (depth == CV_32F || depth == CV_64F)) || - (depth == CV_64F && !doubleSupport) ) + if (depth == CV_64F && !doubleSupport) return false; bool issqrt = std::abs(power - 0.5) < DBL_EPSILON; - const char * const op = issqrt ? "OP_SQRT" : "OP_POW"; + const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), @@ -2060,6 +2060,8 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst) if (issqrt) k.args(srcarg, dstarg); + else if (is_ipower) + k.args(srcarg, dstarg, ipower); else { if (depth == CV_32F) @@ -2076,39 +2078,35 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst) void pow( InputArray _src, double power, OutputArray _dst ) { - CV_OCL_RUN(_dst.isUMat(), - ocl_pow(_src, power, _dst)) - - Mat src = _src.getMat(); - int type = src.type(), depth = src.depth(), cn = src.channels(); - - _dst.create( src.dims, src.size, type ); - Mat dst = _dst.getMat(); - - int ipower = cvRound(power); - bool is_ipower = false; + bool is_ipower = false, same = false; + int type = _src.type(), depth = CV_MAT_DEPTH(type), + cn = CV_MAT_CN(type), ipower = cvRound(power); if( fabs(ipower - power) < DBL_EPSILON ) { if( ipower < 0 ) { - divide( 1., src, dst ); + divide( 1., _src, _dst ); if( ipower == -1 ) return; ipower = -ipower; - src = dst; + same = true; } switch( ipower ) { case 0: - dst = Scalar::all(1); + _dst.createSameSize(_src, type); + _dst.setTo(Scalar::all(1)); return; case 1: - src.copyTo(dst); + _src.copyTo(_dst); return; case 2: - multiply(src, src, dst); + if (same) + multiply(_dst, _dst, _dst); + else + multiply(_src, _src, _dst); return; default: is_ipower = true; @@ -2117,6 +2115,22 @@ void pow( InputArray _src, double power, OutputArray _dst ) else CV_Assert( depth == CV_32F || depth == CV_64F ); + CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, + ocl_pow(same ? _dst : _src, power, _dst, is_ipower, ipower)) + + Mat src, dst; + if (same) + { + dst = _dst.getMat(); + src = dst; + } + else + { + src = _src.getMat(); + _dst.create( src.dims, src.size, type ); + dst = _dst.getMat(); + } + const Mat* arrays[] = {&src, &dst, 0}; uchar* ptrs[2]; NAryMatIterator it(arrays, ptrs); diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 5b7373553..ed0838416 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -211,6 +211,11 @@ #elif defined OP_POW #define PROCESS_ELEM dstelem = pow(srcelem1, srcelem2) +#elif defined OP_POWN +#undef workT +#define workT int +#define PROCESS_ELEM dstelem = pown(srcelem1, srcelem2) + #elif defined OP_SQRT #define PROCESS_ELEM dstelem = sqrt(srcelem1) @@ -293,7 +298,7 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) #if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \ defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \ defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \ - defined OP_MUL || defined OP_DIV + defined OP_MUL || defined OP_DIV || defined OP_POWN #undef EXTRA_PARAMS #define EXTRA_PARAMS , workT srcelem2 #endif diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 2932fc82c..bf29c4cc9 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -773,16 +773,18 @@ typedef ArithmTestBase Pow; OCL_TEST_P(Pow, Mat) { + static const double pows[] = { -4, -1, -2.5, 0, 1, 2, 3.7, 4 }; + for (int j = 0; j < test_loop_times; j++) - { - generateTestData(); - double p = 4.5; + for (int k = 0, size = sizeof(pows) / sizeof(double); k < size; ++k) + { + generateTestData(); - OCL_OFF(cv::pow(src1_roi, p, dst1_roi)); - OCL_ON(cv::pow(usrc1_roi, p, udst1_roi)); + OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi)); + OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi)); - Near(1); - } + Near(1); + } } //////////////////////////////// AddWeighted /////////////////////////////////////////////////