diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index f2acaa3fb..73beb911f 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -442,6 +442,10 @@ template static inline _Tp saturate_cast(int v) { return _Tp( template static inline _Tp saturate_cast(float v) { return _Tp(v); } /** @overload */ template static inline _Tp saturate_cast(double v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(int64 v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(uint64 v) { return _Tp(v); } //! @cond IGNORED @@ -452,6 +456,8 @@ template<> inline uchar saturate_cast(short v) { return saturate_c template<> inline uchar saturate_cast(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); } template<> inline uchar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } template<> inline uchar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline uchar saturate_cast(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } +template<> inline uchar saturate_cast(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); } template<> inline schar saturate_cast(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); } template<> inline schar saturate_cast(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); } @@ -460,6 +466,8 @@ template<> inline schar saturate_cast(short v) { return saturate_c template<> inline schar saturate_cast(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); } template<> inline schar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } template<> inline schar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline schar saturate_cast(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } +template<> inline schar saturate_cast(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); } template<> inline ushort saturate_cast(schar v) { return (ushort)std::max((int)v, 0); } template<> inline ushort saturate_cast(short v) { return (ushort)std::max((int)v, 0); } @@ -467,12 +475,16 @@ template<> inline ushort saturate_cast(int v) { return (ushort)(( template<> inline ushort saturate_cast(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); } template<> inline ushort saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } template<> inline ushort saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline ushort saturate_cast(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } +template<> inline ushort saturate_cast(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); } template<> inline short saturate_cast(ushort v) { return (short)std::min((int)v, SHRT_MAX); } template<> inline short saturate_cast(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } template<> inline short saturate_cast(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); } template<> inline short saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } template<> inline short saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline short saturate_cast(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } +template<> inline short saturate_cast(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); } template<> inline int saturate_cast(float v) { return cvRound(v); } template<> inline int saturate_cast(double v) { return cvRound(v); } diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index 2d1087e89..c651b9ee3 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask, with several computational optimizations. Noise expected to be a gaussian white noise -@param src Input 8-bit 1-channel, 2-channel or 3-channel image. +@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image. @param dst Output image with the same size and type as src . @param templateWindowSize Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels @@ -138,6 +138,35 @@ parameter. CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel, +2-channel, 3-channel or 4-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in dst. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise +@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1 + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ +CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21, + int normType = NORM_L2); + /** @brief Modification of fastNlMeansDenoising function for colored images @param src Input 8-bit 3-channel image. @@ -165,7 +194,35 @@ captured in small period of time. For example video. This version of the functio images or for manual manipulation with colorspaces. For more details see -@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should +@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or +4-channel images sequence. All images should have the same type and +size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength. Bigger h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise + */ +CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been +captured in small period of time. For example video. This version of the function is for grayscale +images or for manual manipulation with colorspaces. For more details see + + +@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel, +2-channel, 3-channel or 4-channel images sequence. All images should have the same type and size. @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence @param temporalWindowSize Number of surrounding images to use for target image denoising. Should @@ -178,13 +235,17 @@ Should be odd. Recommended value 7 pixels @param searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels -@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly -removes noise but also removes image details, smaller h value preserves details but also preserves -some noise +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in dst. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise +@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1 */ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, - int imgToDenoiseIndex, int temporalWindowSize, - float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); + int imgToDenoiseIndex, int temporalWindowSize, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21, + int normType = NORM_L2); /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index b4767a738..c68d09b92 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -45,42 +45,115 @@ #include "fast_nlmeans_multi_denoising_invoker.hpp" #include "fast_nlmeans_denoising_opencl.hpp" +template +static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector& h, + int templateWindowSize, int searchWindowSize) +{ + int hn = (int)h.size(); + + switch (CV_MAT_CN(src.type())) { + case 1: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case 2: + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, Vec2i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case 3: + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, Vec3i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case 4: + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, Vec4i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported number of channels! Only 1, 2, 3, and 4 are supported"); + } +} + void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { + fastNlMeansDenoising(_src, _dst, std::vector(1, h), + templateWindowSize, searchWindowSize); +} + +void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector& h, + int templateWindowSize, int searchWindowSize, int normType) +{ + int hn = (int)h.size(), type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert(hn == 1 || hn == cn); + Size src_size = _src.size(); CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize)) + ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn, + templateWindowSize, searchWindowSize, normType)) Mat src = _src.getMat(); _dst.create(src_size, src.type()); Mat dst = _dst.getMat(); + switch (normType) { + case NORM_L2: #ifdef HAVE_TEGRA_OPTIMIZATION - if(tegra::useTegra() && tegra::fastNlMeansDenoising(src, dst, h, templateWindowSize, searchWindowSize)) - return; + if(hn == 1 && tegra::useTegra() && + tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize)) + return; #endif - - switch (src.type()) { - case CV_8U: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + switch (depth) { + case CV_8U: + fastNlMeansDenoising_(src, dst, h, + templateWindowSize, + searchWindowSize); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported depth! Only CV_8U is supported for NORM_L2"); + } break; - case CV_8UC2: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); - break; - case CV_8UC3: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + case NORM_L1: + switch (depth) { + case CV_8U: + fastNlMeansDenoising_(src, dst, h, + templateWindowSize, + searchWindowSize); + break; + case CV_16U: + fastNlMeansDenoising_(src, dst, h, + templateWindowSize, + searchWindowSize); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1"); + } break; default: CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8UC1, CV_8UC2 and CV_8UC3 are supported"); + "Unsupported norm type! Only NORM_L2 and NORM_L1 are supported"); } } @@ -92,7 +165,7 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, Size src_size = _src.size(); if (type != CV_8UC3 && type != CV_8UC4) { - CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!"); + CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3 or CV_8UC4!"); return; } @@ -108,8 +181,8 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, Mat src_lab; cvtColor(src, src_lab, COLOR_LBGR2Lab); - Mat l(src_size, CV_8U); - Mat ab(src_size, CV_8UC2); + Mat l(src_size, CV_MAKE_TYPE(depth, 1)); + Mat ab(src_size, CV_MAKE_TYPE(depth, 2)); Mat l_ab[] = { l, ab }; int from_to[] = { 0,0, 1,1, 2,2 }; mixChannels(&src_lab, 1, l_ab, 2, from_to, 3); @@ -157,9 +230,76 @@ static void fastNlMeansDenoisingMultiCheckPreconditions( } } +template +static void fastNlMeansDenoisingMulti_( const std::vector& srcImgs, Mat& dst, + int imgToDenoiseIndex, int temporalWindowSize, + const std::vector& h, + int templateWindowSize, int searchWindowSize) +{ + int hn = (int)h.size(); + + switch (srcImgs[0].type()) + { + case CV_8U: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case CV_8UC2: + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, Vec2i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case CV_8UC3: + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, Vec3i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case CV_8UC4: + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, Vec4i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); + } +} + void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, float h, int templateWindowSize, int searchWindowSize) +{ + fastNlMeansDenoisingMulti(_srcImgs, _dst, imgToDenoiseIndex, temporalWindowSize, + std::vector(1, h), templateWindowSize, searchWindowSize); +} + +void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, + int imgToDenoiseIndex, int temporalWindowSize, + const std::vector& h, + int templateWindowSize, int searchWindowSize, int normType) { std::vector srcImgs; _srcImgs.getMatVector(srcImgs); @@ -168,32 +308,52 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds srcImgs, imgToDenoiseIndex, temporalWindowSize, templateWindowSize, searchWindowSize); + int hn = (int)h.size(); + int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert(hn == 1 || hn == cn); + _dst.create(srcImgs[0].size(), srcImgs[0].type()); Mat dst = _dst.getMat(); - switch (srcImgs[0].type()) - { - case CV_8U: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + switch (normType) { + case NORM_L2: + switch (depth) { + case CV_8U: + fastNlMeansDenoisingMulti_(srcImgs, dst, + imgToDenoiseIndex, temporalWindowSize, + h, + templateWindowSize, searchWindowSize); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported depth! Only CV_8U is supported for NORM_L2"); + } break; - case CV_8UC2: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); - break; - case CV_8UC3: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + case NORM_L1: + switch (depth) { + case CV_8U: + fastNlMeansDenoisingMulti_(srcImgs, dst, + imgToDenoiseIndex, temporalWindowSize, + h, + templateWindowSize, searchWindowSize); + break; + case CV_16U: + fastNlMeansDenoisingMulti_(srcImgs, dst, + imgToDenoiseIndex, temporalWindowSize, + h, + templateWindowSize, searchWindowSize); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1"); + } break; default: CV_Error(Error::StsBadArg, - "Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported"); + "Unsupported norm type! Only NORM_L2 and NORM_L1 are supported"); } } @@ -212,9 +372,10 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr _dst.create(srcImgs[0].size(), srcImgs[0].type()); Mat dst = _dst.getMat(); + int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type); int src_imgs_size = static_cast(srcImgs.size()); - if (srcImgs[0].type() != CV_8UC3) + if (type != CV_8UC3) { CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!"); return; @@ -228,9 +389,9 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr std::vector ab(src_imgs_size); for (int i = 0; i < src_imgs_size; i++) { - src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3); - l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1); - ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2); + src_lab[i] = Mat::zeros(srcImgs[0].size(), type); + l[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 1)); + ab[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 2)); cvtColor(srcImgs[i], src_lab[i], COLOR_LBGR2Lab); Mat l_ab[] = { l[i], ab[i] }; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index b8f5a0392..6e74acf03 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -50,13 +50,13 @@ using namespace cv; -template +template struct FastNlMeansDenoisingInvoker : public ParallelLoopBody { public: FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst, - int template_window_size, int search_window_size, const float h); + int template_window_size, int search_window_size, const float *h); void operator() (const Range& range) const; @@ -75,9 +75,9 @@ private: int template_window_half_size_; int search_window_half_size_; - int fixed_point_mult_; + typename pixelInfo::sampleType fixed_point_mult_; int almost_template_window_size_sq_bin_shift_; - std::vector almost_dist2weight_; + std::vector almost_dist2weight_; void calcDistSumsForFirstElementInRow( int i, Array2d& dist_sums, @@ -99,15 +99,15 @@ inline int getNearestPowerOf2(int value) return p; } -template -FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( +template +FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const Mat& src, Mat& dst, int template_window_size, int search_window_size, - const float h) : + const float *h) : src_(src), dst_(dst) { - CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b + CV_Assert(src.channels() == pixelInfo::channels); template_window_half_size_ = template_window_size / 2; search_window_half_size_ = search_window_size / 2; @@ -117,8 +117,10 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( border_size_ = search_window_half_size_ + template_window_half_size_; copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT); - const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255; - fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; + const IT max_estimate_sum_value = + (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo::sampleMax(); + fixed_point_mult_ = (int)std::min(std::numeric_limits::max() / max_estimate_sum_value, + pixelInfo::sampleMax()); // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift @@ -127,30 +129,24 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; - int max_dist = 255 * 255 * sizeof(T); + int max_dist = D::template maxDist(); int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight_.resize(almost_max_dist); - const double WEIGHT_THRESHOLD = 0.001; for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); - - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) - weight = 0; - - almost_dist2weight_[almost_dist] = weight; + almost_dist2weight_[almost_dist] = + D::template calcWeight(dist, h, fixed_point_mult_); } - CV_Assert(almost_dist2weight_[0] == fixed_point_mult_); // additional optimization init end if (dst_.empty()) dst_ = Mat::zeros(src_.size(), src_.type()); } -template -void FastNlMeansDenoisingInvoker::operator() (const Range& range) const +template +void FastNlMeansDenoisingInvoker::operator() (const Range& range) const { int row_from = range.start; int row_to = range.end - 1; @@ -215,7 +211,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) const dist_sums_row[x] -= col_dist_sums_row[x]; int bx = start_bx + x; - col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); + col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); dist_sums_row[x] += col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x]; @@ -227,9 +223,11 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) const } // calc weights - int estimation[3], weights_sum = 0; - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) + IT estimation[pixelInfo::channels], weights_sum[pixelInfo::channels]; + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = 0; + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) + weights_sum[channel_num] = 0; for (int y = 0; y < search_window_size_; y++) { @@ -238,24 +236,21 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) const for (int x = 0; x < search_window_size_; x++) { int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_; - int weight = almost_dist2weight_[almostAvgDist]; - weights_sum += weight; - + WT weight = almost_dist2weight_[almostAvgDist]; T p = cur_row_ptr[border_size_ + search_window_x + x]; - incWithWeight(estimation, weight, p); + incWithWeight(estimation, weights_sum, weight, p); } } - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) - estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum; - - dst_.at(i,j) = saturateCastFromArray(estimation); + divByWeightsSum::channels, pixelInfo::channels>(estimation, + weights_sum); + dst_.at(i,j) = saturateCastFromArray(estimation); } } } -template -inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( +template +inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( int i, Array2d& dist_sums, Array3d& col_dist_sums, @@ -276,7 +271,7 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) { - int dist = calcDist(extended_src_, + int dist = D::template calcDist(extended_src_, border_size_ + i + ty, border_size_ + j + tx, border_size_ + start_y + ty, border_size_ + start_x + tx); @@ -288,8 +283,8 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( } } -template -inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( +template +inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, Array2d& dist_sums, Array3d& col_dist_sums, @@ -312,7 +307,7 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( int by = start_by + y; int bx = start_bx + x; for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) - col_dist_sums[new_last_col_num][y][x] += calcDist(extended_src_, ay + ty, ax, by + ty, bx); + col_dist_sums[new_last_col_num][y][x] += D::template calcDist(extended_src_, ay + ty, ax, by + ty, bx); dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x]; up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x]; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index ab7db5d2d..8f31e8b02 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -44,118 +44,438 @@ using namespace cv; -template static inline int calcDist(const T a, const T b); - -template <> inline int calcDist(const uchar a, const uchar b) +template struct pixelInfo_ { - return (a-b) * (a-b); + static const int channels = 1; + typedef T sampleType; +}; + +template struct pixelInfo_ > +{ + static const int channels = n; + typedef ET sampleType; +}; + +template struct pixelInfo: public pixelInfo_ +{ + using typename pixelInfo_::sampleType; + + static inline sampleType sampleMax() + { + return std::numeric_limits::max(); + } + + static inline sampleType sampleMin() + { + return std::numeric_limits::min(); + } + + static inline size_t sampleBytes() + { + return sizeof(sampleType); + } + + static inline size_t sampleBits() + { + return 8*sampleBytes(); + } +}; + +class DistAbs +{ + template struct calcDist_ + { + static inline int f(const T a, const T b) + { + return std::abs((int)(a-b)); + } + }; + + template struct calcDist_ > + { + static inline int f(const Vec a, const Vec b) + { + return std::abs((int)(a[0]-b[0])) + std::abs((int)(a[1]-b[1])); + } + }; + + template struct calcDist_ > + { + static inline int f(const Vec a, const Vec b) + { + return + std::abs((int)(a[0]-b[0])) + + std::abs((int)(a[1]-b[1])) + + std::abs((int)(a[2]-b[2])); + } + }; + + template struct calcDist_ > + { + static inline int f(const Vec a, const Vec b) + { + return + std::abs((int)(a[0]-b[0])) + + std::abs((int)(a[1]-b[1])) + + std::abs((int)(a[2]-b[2])) + + std::abs((int)(a[3]-b[3])); + } + }; + + template struct calcWeight_ + { + static inline WT f(double dist, const float *h, WT fixed_point_mult) + { + double w = std::exp(-dist*dist / (h[0]*h[0] * pixelInfo::channels)); + if (std::isnan(w)) w = 1.0; // Handle h = 0.0 + + static const double WEIGHT_THRESHOLD = 0.001; + WT weight = (WT)round(fixed_point_mult * w); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0; + + return weight; + } + }; + + template struct calcWeight_ > + { + static inline Vec f(double dist, const float *h, ET fixed_point_mult) + { + Vec res; + for (int i=0; i(dist, &h[i], fixed_point_mult); + return res; + } + }; + +public: + template static inline int calcDist(const T a, const T b) + { + return calcDist_::f(a, b); + } + + template + static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) + { + const T a = m.at(i1, j1); + const T b = m.at(i2, j2); + return calcDist(a,b); + } + + template + static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) + { + return calcDist(a_down, b_down) - calcDist(a_up, b_up); + }; + + template + static inline WT calcWeight(double dist, const float *h, + typename pixelInfo::sampleType fixed_point_mult) + { + return calcWeight_::f(dist, h, fixed_point_mult); + } + + template + static inline int maxDist() + { + return (int)pixelInfo::sampleMax() * pixelInfo::channels; + } +}; + +class DistSquared +{ + template struct calcDist_ + { + static inline int f(const T a, const T b) + { + return (int)(a-b) * (int)(a-b); + } + }; + + template struct calcDist_ > + { + static inline int f(const Vec a, const Vec b) + { + return (int)(a[0]-b[0])*(int)(a[0]-b[0]) + (int)(a[1]-b[1])*(int)(a[1]-b[1]); + } + }; + + template struct calcDist_ > + { + static inline int f(const Vec a, const Vec b) + { + return + (int)(a[0]-b[0])*(int)(a[0]-b[0]) + + (int)(a[1]-b[1])*(int)(a[1]-b[1]) + + (int)(a[2]-b[2])*(int)(a[2]-b[2]); + } + }; + + template struct calcDist_ > + { + static inline int f(const Vec a, const Vec b) + { + return + (int)(a[0]-b[0])*(int)(a[0]-b[0]) + + (int)(a[1]-b[1])*(int)(a[1]-b[1]) + + (int)(a[2]-b[2])*(int)(a[2]-b[2]) + + (int)(a[3]-b[3])*(int)(a[3]-b[3]); + } + }; + + template struct calcUpDownDist_ + { + static inline int f(T a_up, T a_down, T b_up, T b_down) + { + int A = a_down - b_down; + int B = a_up - b_up; + return (A-B)*(A+B); + } + }; + + template struct calcUpDownDist_ > + { + private: + typedef Vec T; + public: + static inline int f(T a_up, T a_down, T b_up, T b_down) + { + return calcDist(a_down, b_down) - calcDist(a_up, b_up); + } + }; + + template struct calcWeight_ + { + static inline WT f(double dist, const float *h, WT fixed_point_mult) + { + double w = std::exp(-dist / (h[0]*h[0] * pixelInfo::channels)); + if (std::isnan(w)) w = 1.0; // Handle h = 0.0 + + static const double WEIGHT_THRESHOLD = 0.001; + WT weight = (WT)round(fixed_point_mult * w); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0; + + return weight; + } + }; + + template struct calcWeight_ > + { + static inline Vec f(double dist, const float *h, ET fixed_point_mult) + { + Vec res; + for (int i=0; i(dist, &h[i], fixed_point_mult); + return res; + } + }; + +public: + template static inline int calcDist(const T a, const T b) + { + return calcDist_::f(a, b); + } + + template + static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) + { + const T a = m.at(i1, j1); + const T b = m.at(i2, j2); + return calcDist(a,b); + } + + template + static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) + { + return calcUpDownDist_::f(a_up, a_down, b_up, b_down); + }; + + template + static inline WT calcWeight(double dist, const float *h, + typename pixelInfo::sampleType fixed_point_mult) + { + return calcWeight_::f(dist, h, fixed_point_mult); + } + + template + static inline int maxDist() + { + return (int)pixelInfo::sampleMax() * (int)pixelInfo::sampleMax() * + pixelInfo::channels; + } +}; + +template struct incWithWeight_ +{ + static inline void f(IT* estimation, IT* weights_sum, WT weight, T p) + { + estimation[0] += (IT)weight * p; + weights_sum[0] += (IT)weight; + } +}; + +template struct incWithWeight_, IT, WT> +{ + static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec p) + { + estimation[0] += (IT)weight * p[0]; + estimation[1] += (IT)weight * p[1]; + weights_sum[0] += (IT)weight; + } +}; + +template struct incWithWeight_, IT, WT> +{ + static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec p) + { + estimation[0] += (IT)weight * p[0]; + estimation[1] += (IT)weight * p[1]; + estimation[2] += (IT)weight * p[2]; + weights_sum[0] += (IT)weight; + } +}; + +template struct incWithWeight_, IT, WT> +{ + static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec p) + { + estimation[0] += (IT)weight * p[0]; + estimation[1] += (IT)weight * p[1]; + estimation[2] += (IT)weight * p[2]; + estimation[3] += (IT)weight * p[3]; + weights_sum[0] += (IT)weight; + } +}; + +template struct incWithWeight_, IT, Vec > +{ + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + { + estimation[0] += (IT)weight[0] * p[0]; + estimation[1] += (IT)weight[1] * p[1]; + weights_sum[0] += (IT)weight[0]; + weights_sum[1] += (IT)weight[1]; + } +}; + +template struct incWithWeight_, IT, Vec > +{ + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + { + estimation[0] += (IT)weight[0] * p[0]; + estimation[1] += (IT)weight[1] * p[1]; + estimation[2] += (IT)weight[2] * p[2]; + weights_sum[0] += (IT)weight[0]; + weights_sum[1] += (IT)weight[1]; + weights_sum[2] += (IT)weight[2]; + } +}; + +template struct incWithWeight_, IT, Vec > +{ + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + { + estimation[0] += (IT)weight[0] * p[0]; + estimation[1] += (IT)weight[1] * p[1]; + estimation[2] += (IT)weight[2] * p[2]; + estimation[3] += (IT)weight[3] * p[3]; + weights_sum[0] += (IT)weight[0]; + weights_sum[1] += (IT)weight[1]; + weights_sum[2] += (IT)weight[2]; + weights_sum[3] += (IT)weight[3]; + } +}; + +template +static inline void incWithWeight(IT* estimation, IT* weights_sum, WT weight, T p) +{ + return incWithWeight_::f(estimation, weights_sum, weight, p); } -template <> inline int calcDist(const Vec2b a, const Vec2b b) +template struct divByWeightsSum_ { - return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]); + static inline void f(IT* estimation, IT* weights_sum); +}; + +template struct divByWeightsSum_ +{ + static inline void f(IT* estimation, IT* weights_sum) + { + estimation[0] = (static_cast(estimation[0]) + weights_sum[0]/2) / weights_sum[0]; + } +}; + +template struct divByWeightsSum_ +{ + static inline void f(IT* estimation, IT* weights_sum) + { + for (size_t i = 0; i < n; i++) + estimation[i] = (static_cast(estimation[i]) + weights_sum[0]/2) / weights_sum[0]; + } +}; + +template struct divByWeightsSum_ +{ + static inline void f(IT* estimation, IT* weights_sum) + { + for (size_t i = 0; i < n; i++) + estimation[i] = (static_cast(estimation[i]) + weights_sum[i]/2) / weights_sum[i]; + } +}; + +template +static inline void divByWeightsSum(IT* estimation, IT* weights_sum) +{ + return divByWeightsSum_::f(estimation, weights_sum); } -template <> inline int calcDist(const Vec3b a, const Vec3b b) +template struct saturateCastFromArray_ { - return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]); -} + static inline T f(IT* estimation) + { + return saturate_cast(estimation[0]); + } +}; -template static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) +template struct saturateCastFromArray_, IT> { - const T a = m.at(i1, j1); - const T b = m.at(i2, j2); - return calcDist(a,b); -} + static inline Vec f(IT* estimation) + { + Vec res; + res[0] = saturate_cast(estimation[0]); + res[1] = saturate_cast(estimation[1]); + return res; + } +}; -template static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) +template struct saturateCastFromArray_, IT> { - return calcDist(a_down, b_down) - calcDist(a_up, b_up); -} + static inline Vec f(IT* estimation) + { + Vec res; + res[0] = saturate_cast(estimation[0]); + res[1] = saturate_cast(estimation[1]); + res[2] = saturate_cast(estimation[2]); + return res; + } +}; -template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down) +template struct saturateCastFromArray_, IT> { - int A = a_down - b_down; - int B = a_up - b_up; - return (A-B)*(A+B); -} + static inline Vec f(IT* estimation) + { + Vec res; + res[0] = saturate_cast(estimation[0]); + res[1] = saturate_cast(estimation[1]); + res[2] = saturate_cast(estimation[2]); + res[3] = saturate_cast(estimation[3]); + return res; + } +}; -template static inline void incWithWeight(int* estimation, int weight, T p); - -template <> inline void incWithWeight(int* estimation, int weight, uchar p) +template static inline T saturateCastFromArray(IT* estimation) { - estimation[0] += weight * p; -} - -template <> inline void incWithWeight(int* estimation, int weight, Vec2b p) -{ - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; -} - -template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) -{ - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; - estimation[2] += weight * p[2]; -} - -template <> inline void incWithWeight(int* estimation, int weight, int p) -{ - estimation[0] += weight * p; -} - -template <> inline void incWithWeight(int* estimation, int weight, Vec2i p) -{ - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; -} - -template <> inline void incWithWeight(int* estimation, int weight, Vec3i p) -{ - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; - estimation[2] += weight * p[2]; -} - -template static inline T saturateCastFromArray(int* estimation); - -template <> inline uchar saturateCastFromArray(int* estimation) -{ - return saturate_cast(estimation[0]); -} - -template <> inline Vec2b saturateCastFromArray(int* estimation) -{ - Vec2b res; - res[0] = saturate_cast(estimation[0]); - res[1] = saturate_cast(estimation[1]); - return res; -} - -template <> inline Vec3b saturateCastFromArray(int* estimation) -{ - Vec3b res; - res[0] = saturate_cast(estimation[0]); - res[1] = saturate_cast(estimation[1]); - res[2] = saturate_cast(estimation[2]); - return res; -} - -template <> inline int saturateCastFromArray(int* estimation) -{ - return estimation[0]; -} - -template <> inline Vec2i saturateCastFromArray(int* estimation) -{ - estimation[1] = 0; - return Vec2i(estimation); -} - -template <> inline Vec3i saturateCastFromArray(int* estimation) -{ - return Vec3i(estimation); + return saturateCastFromArray_::f(estimation); } #endif diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index 1cdd8fa49..1c511f37b 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -28,12 +28,16 @@ static int divUp(int a, int b) return (a + b - 1) / b; } -template -static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn, +template +static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, + int searchWindowSize, int templateWindowSize, + const FT *h, int hn, int cn, int normType, int & almostTemplateWindowSizeSqBinShift) { - const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255; - int fixedPointMult = std::numeric_limits::max() / maxEstimateSumValue; + const WT maxEstimateSumValue = searchWindowSize * searchWindowSize * + std::numeric_limits::max(); + int fixedPointMult = (int)std::min(std::numeric_limits::max() / maxEstimateSumValue, + std::numeric_limits::max()); int depth = DataType::depth; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; @@ -48,33 +52,44 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq; const FT WEIGHT_THRESHOLD = 1e-3f; - int maxDist = 255 * 255 * cn; + int maxDist = normType == NORM_L1 ? std::numeric_limits::max() * cn : + std::numeric_limits::max() * std::numeric_limits::max() * cn; int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1); - FT den = 1.0f / (h * h * cn); + FT den[4]; + CV_Assert(hn > 0 && hn <= 4); + for (int i=0; i 4 || ((normType != NORM_L2 || depth != CV_8U) && + (normType != NORM_L1 || (depth != CV_8U && depth != CV_16U)))) return false; int templateWindowHalfWize = templateWindowSize / 2; @@ -84,33 +99,68 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS); int almostTemplateWindowSizeSqBinShift = -1; - char cvt[2][40]; + char buf[4][40]; String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d" - " -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" + " -D pixel_t=%s -D int_t=%s -D wlut_t=%s" + " -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s" + " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" - " -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s", - templateWindowSize, searchWindowSize, ocl::typeToStr(type), - ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize, - templateWindowHalfWize, searchWindowHalfSize, - ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn, - ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1])); + " -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s", + templateWindowSize, searchWindowSize, + ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), + ocl::typeToStr(CV_32SC(hn)), + depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) : + format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(), + depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) : + format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(), + depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) : + format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(), + depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) : + format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(), + BLOCK_COLS, BLOCK_ROWS, + ctaSize, templateWindowHalfWize, searchWindowHalfSize, + ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, + (depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn), + ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), + normType == NORM_L1 ? " -D ABS" : ""); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); if (k.empty()) return false; UMat almostDist2Weight; - if (!ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn, - almostTemplateWindowSizeSqBinShift)) + if ((depth == CV_8U && + !ocl_calcAlmostDist2Weight(almostDist2Weight, + searchWindowSize, templateWindowSize, + h, hn, cn, normType, + almostTemplateWindowSizeSqBinShift)) || + (depth == CV_16U && + !ocl_calcAlmostDist2Weight(almostDist2Weight, + searchWindowSize, templateWindowSize, + h, hn, cn, normType, + almostTemplateWindowSizeSqBinShift))) return false; CV_Assert(almostTemplateWindowSizeSqBinShift >= 0); UMat srcex; int borderSize = searchWindowHalfSize + templateWindowHalfWize; - copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); + if (cn == 3) { + srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4)); + UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height)); + int from_to[] = { 0,0, 1,1, 2,2 }; + mixChannels(std::vector(1, _src.getUMat()), std::vector(1, src), from_to, 3); + copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize, + BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place + } + else + copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); _dst.create(size, type); - UMat dst = _dst.getUMat(); + UMat dst; + if (cn == 3) + dst.create(size, CV_MAKE_TYPE(depth, 4)); + else + dst = _dst.getUMat(); int searchWindowSizeSq = searchWindowSize * searchWindowSize; Size upColSumSize(size.width, searchWindowSizeSq * nblocksy); @@ -123,7 +173,14 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift); size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 }; - return k.run(2, globalsize, localsize, false); + if (!k.run(2, globalsize, localsize, false)) return false; + + if (cn == 3) { + int from_to[] = { 0,0, 1,1, 2,2 }; + mixChannels(std::vector(1, dst), std::vector(1, _dst.getUMat()), from_to, 3); + } + + return true; } static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 191a67127..3f13f400d 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -50,14 +50,14 @@ using namespace cv; -template +template struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody { public: FastNlMeansMultiDenoisingInvoker(const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, Mat& dst, int template_window_size, - int search_window_size, const float h); + int search_window_size, const float *h); void operator() (const Range& range) const; @@ -81,9 +81,9 @@ private: int search_window_half_size_; int temporal_window_half_size_; - int fixed_point_mult_; + typename pixelInfo::sampleType fixed_point_mult_; int almost_template_window_size_sq_bin_shift; - std::vector almost_dist2weight; + std::vector almost_dist2weight; void calcDistSumsForFirstElementInRow(int i, Array3d& dist_sums, Array4d& col_dist_sums, @@ -94,19 +94,19 @@ private: Array4d& up_col_dist_sums) const; }; -template -FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( +template +FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, cv::Mat& dst, int template_window_size, int search_window_size, - const float h) : + const float *h) : dst_(dst), extended_srcs_(srcImgs.size()) { CV_Assert(srcImgs.size() > 0); - CV_Assert(srcImgs[0].channels() == sizeof(T)); + CV_Assert(srcImgs[0].channels() == pixelInfo::channels); rows_ = srcImgs[0].rows; cols_ = srcImgs[0].cols; @@ -125,8 +125,10 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT); main_extended_src_ = extended_srcs_[temporal_window_half_size_]; - const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255; - fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; + const IT max_estimate_sum_value = + (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo::sampleMax(); + fixed_point_mult_ = (int)std::min(std::numeric_limits::max() / max_estimate_sum_value, + pixelInfo::sampleMax()); // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift @@ -138,30 +140,24 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; - int max_dist = 255 * 255 * sizeof(T); - int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); + int max_dist = D::template maxDist(); + int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight.resize(almost_max_dist); - const double WEIGHT_THRESHOLD = 0.001; for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); - - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) - weight = 0; - - almost_dist2weight[almost_dist] = weight; + almost_dist2weight[almost_dist] = + D::template calcWeight(dist, h, fixed_point_mult_); } - CV_Assert(almost_dist2weight[0] == fixed_point_mult_); // additional optimization init end if (dst_.empty()) dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type()); } -template -void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const +template +void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { int row_from = range.start; int row_to = range.end - 1; @@ -234,7 +230,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const dist_sums_row[x] -= col_dist_sums_row[x]; col_dist_sums_row[x] = up_col_dist_sums_row[x] + - calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); + D::template calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); dist_sums_row[x] += col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x]; @@ -247,11 +243,11 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const } // calc weights - int weights_sum = 0; - - int estimation[3]; - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) + IT estimation[pixelInfo::channels], weights_sum[pixelInfo::channels]; + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = 0; + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) + weights_sum[channel_num] = 0; for (int d = 0; d < temporal_window_size_; d++) { @@ -266,26 +262,22 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift; - int weight = almost_dist2weight[almostAvgDist]; - weights_sum += weight; - + WT weight = almost_dist2weight[almostAvgDist]; T p = cur_row_ptr[border_size_ + search_window_x + x]; - incWithWeight(estimation, weight, p); + incWithWeight(estimation, weights_sum, weight, p); } } } - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) - estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum; - - dst_.at(i,j) = saturateCastFromArray(estimation); - + divByWeightsSum::channels, pixelInfo::channels>(estimation, + weights_sum); + dst_.at(i,j) = saturateCastFromArray(estimation); } } } -template -inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( +template +inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( int i, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { int j = 0; @@ -310,7 +302,7 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRo { for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) { - int dist = calcDist( + int dist = D::template calcDist( main_extended_src_.at(border_size_ + i + ty, border_size_ + j + tx), cur_extended_src.at(border_size_ + start_y + ty, border_size_ + start_x + tx)); @@ -325,8 +317,8 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRo } } -template -inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( +template +inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { @@ -353,7 +345,7 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRo int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x]; for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) { - *col_dist_sums_ptr += calcDist( + *col_dist_sums_ptr += D::template calcDist( main_extended_src_.at(ay + ty, ax), cur_extended_src.at(by + ty, bx)); } diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index af3fb1f9b..879665f48 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -20,21 +20,23 @@ #ifdef OP_CALC_WEIGHTS -__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist, +__kernel void calcAlmostDist2Weight(__global wlut_t * almostDist2Weight, int almostMaxDist, FT almostDist2ActualDistMultiplier, int fixedPointMult, - FT den, FT WEIGHT_THRESHOLD) + w_t den, FT WEIGHT_THRESHOLD) { int almostDist = get_global_id(0); if (almostDist < almostMaxDist) { FT dist = almostDist * almostDist2ActualDistMultiplier; - int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den)); - - if (weight < WEIGHT_THRESHOLD * fixedPointMult) - weight = 0; - - almostDist2Weight[almostDist] = weight; +#ifdef ABS + w_t w = exp((w_t)(-dist*dist) * den); +#else + w_t w = exp((w_t)(-dist) * den); +#endif + wlut_t weight = convert_wlut_t(fixedPointMult * (isnan(w) ? (w_t)1.0 : w)); + almostDist2Weight[almostDist] = + weight < (wlut_t)(WEIGHT_THRESHOLD * fixedPointMult) ? (wlut_t)0 : weight; } } @@ -44,21 +46,35 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost #define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE) -inline int calcDist(uchar_t a, uchar_t b) +inline int calcDist(pixel_t a, pixel_t b) { +#ifdef ABS + int_t retval = convert_int_t(abs_diff(a, b)); +#else int_t diff = convert_int_t(a) - convert_int_t(b); int_t retval = diff * diff; +#endif #if cn == 1 return retval; #elif cn == 2 return retval.x + retval.y; +#elif cn == 3 + return retval.x + retval.y + retval.z; +#elif cn == 4 + return retval.x + retval.y + retval.z + retval.w; #else -#error "cn should be either 1 or 2" +#error "cn should be either 1, 2, 3 or 4" #endif } -inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t) +#ifdef ABS +inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t) +{ + return calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t); +} +#else +inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t) { int_t A = convert_int_t(down_value) - convert_int_t(down_value_t); int_t B = convert_int_t(up_value) - convert_int_t(up_value_t); @@ -68,10 +84,15 @@ inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_v return retval; #elif cn == 2 return retval.x + retval.y; +#elif cn == 3 + return retval.x + retval.y + retval.z; +#elif cn == 4 + return retval.x + retval.y + retval.z + retval.w; #else -#error "cn should be either 1 or 2" +#error "cn should be either 1, 2, 3 or 4" #endif } +#endif #define COND if (x == 0 && y == 0) @@ -87,9 +108,9 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int { int dist = 0, value; - __global const uchar_t * src_template = (__global const uchar_t *)(src + - mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset))); - __global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); + __global const pixel_t * src_template = (__global const pixel_t *)(src + + mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset))); + __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset))); __global int * col_dists_current = col_dists + i * TEMPLATE_SIZE; #pragma unroll @@ -107,8 +128,8 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int dist += value; } - src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step); - src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step); + src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step); + src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step); } #pragma unroll @@ -130,9 +151,9 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE) { - __global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); - __global const uchar_t * src_template = (__global const uchar_t *)(src + - mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset))); + __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset))); + __global const pixel_t * src_template = (__global const pixel_t *)(src + + mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset))); __global int * col_dists_current = col_dists + TEMPLATE_SIZE * i; int col_dist = 0; @@ -142,8 +163,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int { col_dist += calcDist(src_current[0], src_template[0]); - src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step); - src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step); + src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step); + src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step); } dists[i] += col_dist - col_dists_current[first]; @@ -160,8 +181,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset int sy_up = y - TEMPLATE_SIZE2 - 1; int sy_down = y + TEMPLATE_SIZE2; - uchar_t up_value = *(__global const uchar_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset))); - uchar_t down_value = *(__global const uchar_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset))); + pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(psz, sx, src_offset))); + pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(psz, sx, src_offset))); sx -= SEARCH_SIZE2; sy_up -= SEARCH_SIZE2; @@ -171,8 +192,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset { int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE; - uchar_t up_value_t = *(__global const uchar_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset))); - uchar_t down_value_t = *(__global const uchar_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset))); + pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(psz, sx + wx, src_offset))); + pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(psz, sx + wx, src_offset))); __global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first); __global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i); @@ -186,24 +207,25 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset } inline void convolveWindow(__global const uchar * src, int src_step, int src_offset, - __local int * dists, __global const int * almostDist2Weight, + __local int * dists, __global const wlut_t * almostDist2Weight, __global uchar * dst, int dst_step, int dst_offset, - int y, int x, int id, __local int * weights_local, - __local int_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) + int y, int x, int id, __local weight_t * weights_local, + __local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) { - int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2, weights = 0; - int_t weighted_sum = (int_t)(0); + int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2; + weight_t weights = (weight_t)0; + sum_t weighted_sum = (sum_t)0; for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE) { - int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset)); - int_t src_value = convert_int_t(*(__global const uchar_t *)(src + src_index)); + int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, psz, src_offset)); + sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index)); int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift; - int weight = almostDist2Weight[almostAvgDist]; + weight_t weight = convert_weight_t(almostDist2Weight[almostAvgDist]); weights += weight; - weighted_sum += (int_t)(weight) * src_value; + weighted_sum += (sum_t)weight * src_value; } weights_local[id] = weights; @@ -223,26 +245,27 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off if (id == 0) { - int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset)); - int_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] + + int dst_index = mad24(y, dst_step, mad24(psz, x, dst_offset)); + sum_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] + weighted_sum_local[2] + weighted_sum_local[3]; - int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3]; + weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3]; - *(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / (int_t)(weights_local_0)); + *(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)weights_local_0); } } __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, - __global const int * almostDist2Weight, __global uchar * buffer, + __global const wlut_t * almostDist2Weight, __global uchar * buffer, int almostTemplateWindowSizeSqBinShift) { int block_x = get_group_id(0), nblocks_x = get_num_groups(0); int block_y = get_group_id(1); int id = get_local_id(0), first; - __local int dists[SEARCH_SIZE_SQ], weights[CTA_SIZE]; - __local int_t weighted_sum[CTA_SIZE]; + __local int dists[SEARCH_SIZE_SQ]; + __local weight_t weights[CTA_SIZE]; + __local sum_t weighted_sum[CTA_SIZE]; int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols); int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows); diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index cb2d74f85..f749564c6 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -13,11 +13,11 @@ namespace cvtest { namespace ocl { -PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool) +PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, int, bool, bool) { - int cn, templateWindowSize, searchWindowSize; - float h; - bool use_roi; + int cn, normType, templateWindowSize, searchWindowSize; + std::vector h; + bool use_roi, use_image; TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_OUTPUT_PARAMETER(dst); @@ -25,29 +25,46 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool) virtual void SetUp() { cn = GET_PARAM(0); - use_roi = GET_PARAM(1); + normType = GET_PARAM(1); + use_roi = GET_PARAM(2); + use_image = GET_PARAM(3); templateWindowSize = 7; searchWindowSize = 21; - h = 3.0f; + + h.resize(cn); + for (int i=0; i 0 && cn <= 4); + if (cn == 2) { + int from_to[] = { 0,0, 1,1 }; + src_roi.create(roiSize, type); + mixChannels(&image, 1, &src_roi, 1, from_to, 2); + } + else if (cn == 4) { + int from_to[] = { 0,0, 1,1, 2,2, 1,3}; + src_roi.create(roiSize, type); + mixChannels(&image, 1, &src_roi, 1, from_to, 4); + } + else image.copyTo(src_roi); + } Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255); @@ -65,8 +82,23 @@ OCL_TEST_P(FastNlMeansDenoising, Mat) { generateTestData(); - OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize)); - OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize)); + OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, std::vector(1, h[0]), templateWindowSize, searchWindowSize, normType)); + OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, std::vector(1, h[0]), templateWindowSize, searchWindowSize, normType)); + + OCL_EXPECT_MATS_NEAR(dst, 1); + } +} + +typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising_hsep; + +OCL_TEST_P(FastNlMeansDenoising_hsep, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize, normType)); + OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize, normType)); OCL_EXPECT_MATS_NEAR(dst, 1); } @@ -80,15 +112,21 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat) { generateTestData(); - OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize)); - OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize)); + OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h[0], h[0], templateWindowSize, searchWindowSize)); + OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h[0], h[0], templateWindowSize, searchWindowSize)); OCL_EXPECT_MATS_NEAR(dst, 1); } } -OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, + Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1), + Bool(), Values(true))); +OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep, + Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1), + Bool(), Values(true))); +OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, + Combine(Values(3, 4), Values((int)NORM_L2), Bool(), Values(false))); } } // namespace cvtest::ocl