From c80faff42f88eb82b27f54f0365099f286824623 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 30 Apr 2014 14:59:37 +0400 Subject: [PATCH] added cn>1 support to cv::norm (NORM_INF) --- modules/core/src/opencl/reduce.cl | 85 +++++++++++++++---------- modules/core/src/stat.cpp | 89 ++++++++++++++++++--------- modules/core/test/ocl/test_arithm.cpp | 5 +- 3 files changed, 117 insertions(+), 62 deletions(-) diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index ed935881d..6b4ccddeb 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -50,6 +50,36 @@ #endif #endif +#if defined OP_NORM_INF_MASK || defined OP_MIN_MAX_LOC || defined OP_MIN_MAX_LOC_MASK + +#ifdef DEPTH_0 +#define MIN_VAL 0 +#define MAX_VAL 255 +#elif defined DEPTH_1 +#define MIN_VAL -128 +#define MAX_VAL 127 +#elif defined DEPTH_2 +#define MIN_VAL 0 +#define MAX_VAL 65535 +#elif defined DEPTH_3 +#define MIN_VAL -32768 +#define MAX_VAL 32767 +#elif defined DEPTH_4 +#define MIN_VAL INT_MIN +#define MAX_VAL INT_MAX +#elif defined DEPTH_5 +#define MIN_VAL (-FLT_MAX) +#define MAX_VAL FLT_MAX +#elif defined DEPTH_6 +#define MIN_VAL (-DBL_MAX) +#define MAX_VAL DBL_MAX +#endif + +#define dstT srcT +#define dstT1 srcT1 + +#endif // min/max stuff + #define noconvert #if cn != 3 @@ -145,41 +175,32 @@ #define CALC_RESULT \ storepix(localmem[0], dstptr + dstTSIZE * gid) +// norm (NORM_INF) with cn > 1 and mask +#elif defined OP_NORM_INF_MASK + +#define DECLARE_LOCAL_MEM \ + __local srcT localmem_max[WGS2_ALIGNED] +#define DEFINE_ACCUMULATOR \ + srcT maxval = MIN_VAL, temp +#define REDUCE_GLOBAL \ + int mask_index = mad24(id / cols, mask_step, mask_offset + (id % cols)); \ + if (mask[mask_index]) \ + { \ + temp = loadpix(srcptr + src_index); \ + maxval = max(maxval, (srcT)(temp >= 0 ? temp : -temp)); \ + } +#define SET_LOCAL_1 \ + localmem_max[lid] = maxval +#define REDUCE_LOCAL_1 \ + localmem_max[lid - WGS2_ALIGNED] = max(maxval, localmem_max[lid - WGS2_ALIGNED]) +#define REDUCE_LOCAL_2 \ + localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]) +#define CALC_RESULT \ + storepix(localmem_max[0], dstptr + dstTSIZE * gid) + // minMaxLoc stuff #elif defined OP_MIN_MAX_LOC || defined OP_MIN_MAX_LOC_MASK -#ifdef DEPTH_0 -#define srcT uchar -#define MIN_VAL 0 -#define MAX_VAL 255 -#elif defined DEPTH_1 -#define srcT char -#define MIN_VAL -128 -#define MAX_VAL 127 -#elif defined DEPTH_2 -#define srcT ushort -#define MIN_VAL 0 -#define MAX_VAL 65535 -#elif defined DEPTH_3 -#define srcT short -#define MIN_VAL -32768 -#define MAX_VAL 32767 -#elif defined DEPTH_4 -#define srcT int -#define MIN_VAL INT_MIN -#define MAX_VAL INT_MAX -#elif defined DEPTH_5 -#define srcT float -#define MIN_VAL (-FLT_MAX) -#define MAX_VAL FLT_MAX -#elif defined DEPTH_6 -#define srcT double -#define MIN_VAL (-DBL_MAX) -#define MAX_VAL DBL_MAX -#endif - -#define dstT srcT - #define DECLARE_LOCAL_MEM \ __local srcT localmem_min[WGS2_ALIGNED]; \ __local srcT localmem_max[WGS2_ALIGNED]; \ diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 8c57bbe3f..d60a6836d 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -41,10 +41,11 @@ //M*/ #include "precomp.hpp" -#include "opencl_kernels.hpp" #include #include +#include "opencl_kernels.hpp" + namespace cv { @@ -1245,7 +1246,7 @@ void getMinMaxRes(const Mat &minv, const Mat &maxv, const Mat &minl, const Mat & T min = std::numeric_limits::max(); T max = std::numeric_limits::min() > 0 ? -std::numeric_limits::max() : std::numeric_limits::min(); int minloc = INT_MAX, maxloc = INT_MAX; - for( int i = 0; i < groupnum; i++) + for (int i = 0; i < groupnum; i++) { T current_min = minv.at(0,i); T current_max = maxv.at(0,i); @@ -1262,16 +1263,16 @@ void getMinMaxRes(const Mat &minv, const Mat &maxv, const Mat &minl, const Mat & } } bool zero_mask = (maxloc == INT_MAX) || (minloc == INT_MAX); - if(minVal) + if (minVal) *minVal = zero_mask ? 0 : (double)min; - if(maxVal) + if (maxVal) *maxVal = zero_mask ? 0 : (double)max; - if(minLoc) + if (minLoc) { minLoc[0] = zero_mask ? -1 : minloc/cols; minLoc[1] = zero_mask ? -1 : minloc%cols; } - if(maxLoc) + if (maxLoc) { maxLoc[0] = zero_mask ? -1 : maxloc/cols; maxLoc[1] = zero_mask ? -1 : maxloc%cols; @@ -1300,8 +1301,9 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* wgs2_aligned <<= 1; wgs2_aligned >>= 1; - String opts = format("-D DEPTH_%d -D OP_MIN_MAX_LOC%s -D WGS=%d -D WGS2_ALIGNED=%d%s", - depth, _mask.empty() ? "" : "_MASK", (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : ""); + String opts = format("-D DEPTH_%d -D srcT=%s -D OP_MIN_MAX_LOC%s -D WGS=%d -D WGS2_ALIGNED=%d%s", + depth, ocl::typeToStr(depth), _mask.empty() ? "" : "_MASK", (int)wgs, + wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : ""); ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts); if (k.empty()) @@ -1980,39 +1982,70 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & haveMask = _mask.kind() != _InputArray::NONE; if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) || - (!doubleSupport && depth == CV_64F) || (normType == NORM_INF && haveMask && cn != 1)) + (!doubleSupport && depth == CV_64F)) return false; UMat src = _src.getUMat(); if (normType == NORM_INF) { - UMat abssrc; - - if (depth != CV_8U && depth != CV_16U) + if (cn == 1 || !haveMask) { - int wdepth = std::max(CV_32S, depth); - char cvt[50]; + UMat abssrc; - ocl::Kernel kabs("KF", ocl::core::arithm_oclsrc, - format("-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s -D convertToDT=%s%s", - ocl::typeToStr(wdepth), ocl::typeToStr(depth), - ocl::convertTypeStr(depth, wdepth, 1, cvt), - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); - if (kabs.empty()) - return false; + if (depth != CV_8U && depth != CV_16U) + { + int wdepth = std::max(CV_32S, depth); + char cvt[50]; - abssrc.create(src.size(), CV_MAKE_TYPE(wdepth, cn)); - kabs.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(abssrc, cn)); + ocl::Kernel kabs("KF", ocl::core::arithm_oclsrc, + format("-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s -D convertToDT=%s%s", + ocl::typeToStr(wdepth), ocl::typeToStr(depth), + ocl::convertTypeStr(depth, wdepth, 1, cvt), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (kabs.empty()) + return false; - size_t globalsize[2] = { src.cols * cn, src.rows }; - if (!kabs.run(2, globalsize, NULL, false)) - return false; + abssrc.create(src.size(), CV_MAKE_TYPE(wdepth, cn)); + kabs.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(abssrc, cn)); + + size_t globalsize[2] = { src.cols * cn, src.rows }; + if (!kabs.run(2, globalsize, NULL, false)) + return false; + } + else + abssrc = src; + + cv::minMaxIdx(haveMask ? abssrc : abssrc.reshape(1), NULL, &result, NULL, NULL, _mask); } else - abssrc = src; + { + int dbsize = ocl::Device::getDefault().maxComputeUnits(); + size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); - cv::minMaxIdx(haveMask ? abssrc : abssrc.reshape(1), NULL, &result, NULL, NULL, _mask); + int wgs2_aligned = 1; + while (wgs2_aligned < (int)wgs) + wgs2_aligned <<= 1; + wgs2_aligned >>= 1; + + ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, + format("-D OP_NORM_INF_MASK -D HAVE_MASK -D DEPTH_%d" + " -D srcT=%s -D srcT1=%s -D WGS=%d -D cn=%d -D WGS2_ALIGNED=%d%s", + depth, ocl::typeToStr(type), ocl::typeToStr(depth), + wgs, cn, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + UMat db(1, dbsize, type), mask = _mask.getUMat(); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), + dbsize, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask)); + + size_t globalsize = dbsize * wgs; + if (!k.run(1, &globalsize, &wgs, true)) + return false; + + minMaxIdx(db.getMat(ACCESS_READ), NULL, &result, NULL, NULL, noArray()); + } } else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) { diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index d2b26e146..d39697584 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1149,7 +1149,7 @@ OCL_TEST_P(MinMaxIdx, Mat) int p1[2], p2[2], up1[2], up2[2]; double minv, maxv, uminv, umaxv; - if(src1_roi.channels() > 1) + if (cn > 1) { OCL_OFF(cv::minMaxIdx(src2_roi, &minv, &maxv) ); OCL_ON(cv::minMaxIdx(usrc2_roi, &uminv, &umaxv)); @@ -1164,7 +1164,8 @@ OCL_TEST_P(MinMaxIdx, Mat) EXPECT_DOUBLE_EQ(minv, uminv); EXPECT_DOUBLE_EQ(maxv, umaxv); - for( int i = 0; i < 2; i++) + + for (int i = 0; i < 2; i++) { EXPECT_EQ(p1[i], up1[i]); EXPECT_EQ(p2[i], up2[i]);