diff --git a/modules/core/doc/operations_on_arrays.rst b/modules/core/doc/operations_on_arrays.rst index c2121fc6f..3f85eab30 100644 --- a/modules/core/doc/operations_on_arrays.rst +++ b/modules/core/doc/operations_on_arrays.rst @@ -2065,7 +2065,7 @@ normalize --------- Normalizes the norm or value range of an array. -.. ocv:function:: void normalize( InputArray src, OutputArray dst, double alpha=1, double beta=0, int norm_type=NORM_L2, int dtype=-1, InputArray mask=noArray() ) +.. ocv:function:: void normalize( InputArray src, InputOutputArray dst, double alpha=1, double beta=0, int norm_type=NORM_L2, int dtype=-1, InputArray mask=noArray() ) .. ocv:function:: void normalize(const SparseMat& src, SparseMat& dst, double alpha, int normType) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 9c409482c..ed15594ae 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -240,7 +240,7 @@ CV_EXPORTS_W void batchDistance(InputArray src1, InputArray src2, bool crosscheck = false); //! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values -CV_EXPORTS_W void normalize( InputArray src, OutputArray dst, double alpha = 1, double beta = 0, +CV_EXPORTS_W void normalize( InputArray src, InputOutputArray dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray()); //! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index d921f7565..d399265b0 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -131,6 +131,7 @@ public: virtual bool isSubmatrix(int i=-1) const; virtual bool empty() const; virtual void copyTo(const _OutputArray& arr) const; + virtual void copyTo(const _OutputArray& arr, const _InputArray & mask) const; virtual size_t offset(int i=-1) const; virtual size_t step(int i=-1) const; bool isMat() const; diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index 01a792cb9..4848489a0 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -1842,18 +1842,86 @@ namespace cv { #ifdef HAVE_OPENCL -static bool ocl_normalize( InputArray _src, OutputArray _dst, InputArray _mask, int rtype, - double scale, double shift ) +static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype, + double scale, double delta ) { - UMat src = _src.getUMat(), dst = _dst.getUMat(); + UMat src = _src.getUMat(); if( _mask.empty() ) - src.convertTo( dst, rtype, scale, shift ); + src.convertTo( _dst, dtype, scale, delta ); + else if (src.channels() <= 4) + { + const ocl::Device & dev = ocl::Device::getDefault(); + + int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), + ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)), + rowsPerWI = dev.isIntel() ? 4 : 1; + + float fscale = static_cast(scale), fdelta = static_cast(delta); + bool haveScale = std::fabs(scale - 1) > DBL_EPSILON, + haveZeroScale = !(std::fabs(scale) > DBL_EPSILON), + haveDelta = std::fabs(delta) > DBL_EPSILON, + doubleSupport = dev.doubleFPConfig() > 0; + + if (!haveScale && !haveDelta && stype == dtype) + { + _src.copyTo(_dst, _mask); + return true; + } + if (haveZeroScale) + { + _dst.setTo(Scalar(delta), _mask); + return true; + } + + if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport) + return false; + + char cvt[2][40]; + String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d" + " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s", + ocl::typeToStr(stype), ocl::typeToStr(dtype), + ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn, + rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), + ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + haveScale ? " -D HAVE_SCALE" : "", + haveDelta ? " -D HAVE_DELTA" : "", + ocl::typeToStr(sdepth), ocl::typeToStr(ddepth)); + + ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts); + if (k.empty()) + return false; + + UMat mask = _mask.getUMat(), dst = _dst.getUMat(); + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), + maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), + dstarg = ocl::KernelArg::ReadWrite(dst); + + if (haveScale) + { + if (haveDelta) + k.args(srcarg, maskarg, dstarg, fscale, fdelta); + else + k.args(srcarg, maskarg, dstarg, fscale); + } + else + { + if (haveDelta) + k.args(srcarg, maskarg, dstarg, fdelta); + else + k.args(srcarg, maskarg, dstarg); + } + + size_t globalsize[2] = { src.cols, (src.rows + rowsPerWI - 1) / rowsPerWI }; + return k.run(2, globalsize, NULL, false); + } else { UMat temp; - src.convertTo( temp, rtype, scale, shift ); - temp.copyTo( dst, _mask ); + src.convertTo( temp, dtype, scale, delta ); + temp.copyTo( _dst, _mask ); } return true; @@ -1863,7 +1931,7 @@ static bool ocl_normalize( InputArray _src, OutputArray _dst, InputArray _mask, } -void cv::normalize( InputArray _src, OutputArray _dst, double a, double b, +void cv::normalize( InputArray _src, InputOutputArray _dst, double a, double b, int norm_type, int rtype, InputArray _mask ) { double scale = 1, shift = 0; diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 0bb62c7df..653efe63a 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -2051,6 +2051,23 @@ void _InputArray::copyTo(const _OutputArray& arr) const CV_Error(Error::StsNotImplemented, ""); } +void _InputArray::copyTo(const _OutputArray& arr, const _InputArray & mask) const +{ + int k = kind(); + + if( k == NONE ) + arr.release(); + else if( k == MAT || k == MATX || k == STD_VECTOR ) + { + Mat m = getMat(); + m.copyTo(arr, mask); + } + else if( k == UMAT ) + ((UMat*)obj)->copyTo(arr, mask); + else + CV_Error(Error::StsNotImplemented, ""); +} + bool _OutputArray::fixedSize() const { return (flags & FIXED_SIZE) == FIXED_SIZE; diff --git a/modules/core/src/opencl/normalize.cl b/modules/core/src/opencl/normalize.cl new file mode 100644 index 000000000..6582e5555 --- /dev/null +++ b/modules/core/src/opencl/normalize.cl @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +#define noconvert + +#if cn != 3 +#define loadpix(addr) *(__global const srcT *)(addr) +#define storepix(val, addr) *(__global dstT *)(addr) = val +#define srcTSIZE (int)sizeof(srcT) +#define dstTSIZE (int)sizeof(dstT) +#else +#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr)) +#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr)) +#define srcTSIZE ((int)sizeof(srcT1)*3) +#define dstTSIZE ((int)sizeof(dstT1)*3) +#endif + +__kernel void normalizek(__global const uchar * srcptr, int src_step, int src_offset, + __global const uchar * mask, int mask_step, int mask_offset, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols +#ifdef HAVE_SCALE + , float scale +#endif +#ifdef HAVE_DELTA + , float delta +#endif + ) +{ + int x = get_global_id(0); + int y0 = get_global_id(1) * rowsPerWI; + + if (x < dst_cols) + { + int src_index = mad24(y0, src_step, mad24(x, srcTSIZE, src_offset)); + int mask_index = mad24(y0, mask_step, x + mask_offset); + int dst_index = mad24(y0, dst_step, mad24(x, dstTSIZE, dst_offset)); + + for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; + ++y, src_index += src_step, dst_index += dst_step, mask_index += mask_step) + { + if (mask[mask_index]) + { + workT value = convertToWT(loadpix(srcptr + src_index)); +#ifdef HAVE_SCALE +#ifdef HAVE_DELTA + value = fma(value, (workT)(scale), (workT)(delta)); +#else + value *= (workT)(scale); +#endif +#else // not scale +#ifdef HAVE_DELTA + value += (workT)(delta); +#endif +#endif + + storepix(convertToDT(value), dstptr + dst_index); + } + } + } +}