From e178294b49ce36938fd275d3309db6110698b492 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Thu, 12 Feb 2015 15:23:28 +0100 Subject: [PATCH 01/40] Refactoring in preparation for 16-bit implementation of fastNlMeansDenoising --- modules/photo/src/denoising.cpp | 12 +- .../src/fast_nlmeans_denoising_invoker.hpp | 86 +++---- ...fast_nlmeans_denoising_invoker_commons.hpp | 218 ++++++++++-------- .../fast_nlmeans_multi_denoising_invoker.hpp | 93 ++++---- 4 files changed, 222 insertions(+), 187 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index a074ac136..724ea0eb0 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -65,17 +65,17 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; default: @@ -175,19 +175,19 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds { case CV_8U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index b8f5a0392..2ad0189ef 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -50,7 +50,7 @@ using namespace cv; -template +template struct FastNlMeansDenoisingInvoker : public ParallelLoopBody { @@ -75,20 +75,20 @@ private: int template_window_half_size_; int search_window_half_size_; - int fixed_point_mult_; + IT fixed_point_mult_; int almost_template_window_size_sq_bin_shift_; - std::vector almost_dist2weight_; + std::vector almost_dist2weight_; void calcDistSumsForFirstElementInRow( - int i, Array2d& dist_sums, - Array3d& col_dist_sums, - Array3d& up_col_dist_sums) const; + int i, Array2d& dist_sums, + Array3d& col_dist_sums, + Array3d& up_col_dist_sums) const; void calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, - Array2d& dist_sums, - Array3d& col_dist_sums, - Array3d& up_col_dist_sums) const; + Array2d& dist_sums, + Array3d& col_dist_sums, + Array3d& up_col_dist_sums) const; }; inline int getNearestPowerOf2(int value) @@ -99,8 +99,8 @@ inline int getNearestPowerOf2(int value) return p; } -template -FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( +template +FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const Mat& src, Mat& dst, int template_window_size, int search_window_size, @@ -117,8 +117,8 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( border_size_ = search_window_half_size_ + template_window_half_size_; copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT); - const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255; - fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; + const IT max_estimate_sum_value = (IT)search_window_size_ * (IT)search_window_size_ * 255; + fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift @@ -127,7 +127,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; - int max_dist = 255 * 255 * sizeof(T); + IT max_dist = 255 * 255 * sizeof(T); int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight_.resize(almost_max_dist); @@ -135,7 +135,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -149,21 +149,21 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( dst_ = Mat::zeros(src_.size(), src_.type()); } -template -void FastNlMeansDenoisingInvoker::operator() (const Range& range) const +template +void FastNlMeansDenoisingInvoker::operator() (const Range& range) const { int row_from = range.start; int row_to = range.end - 1; // sums of cols anf rows for current pixel p - Array2d dist_sums(search_window_size_, search_window_size_); + Array2d dist_sums(search_window_size_, search_window_size_); // for lazy calc optimization (sum of cols for current pixel) - Array3d col_dist_sums(template_window_size_, search_window_size_, search_window_size_); + Array3d col_dist_sums(template_window_size_, search_window_size_, search_window_size_); int first_col_num = -1; // last elements of column sum (for each element in row) - Array3d up_col_dist_sums(src_.cols, search_window_size_, search_window_size_); + Array3d up_col_dist_sums(src_.cols, search_window_size_, search_window_size_); for (int i = row_from; i <= row_to; i++) { @@ -202,9 +202,9 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) const for (int y = 0; y < search_window_size; y++) { - int * dist_sums_row = dist_sums.row_ptr(y); - int * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y); - int * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y); + IT * dist_sums_row = dist_sums.row_ptr(y); + IT * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y); + IT * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y); const T * b_up_ptr = extended_src_.ptr(start_by - template_window_half_size_ - 1 + y); const T * b_down_ptr = extended_src_.ptr(start_by + template_window_half_size_ + y); @@ -215,7 +215,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) const dist_sums_row[x] -= col_dist_sums_row[x]; int bx = start_bx + x; - col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); + col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); dist_sums_row[x] += col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x]; @@ -227,39 +227,39 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) const } // calc weights - int estimation[3], weights_sum = 0; + IT estimation[3], weights_sum = 0; for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) estimation[channel_num] = 0; for (int y = 0; y < search_window_size_; y++) { const T* cur_row_ptr = extended_src_.ptr(border_size_ + search_window_y + y); - int* dist_sums_row = dist_sums.row_ptr(y); + IT* dist_sums_row = dist_sums.row_ptr(y); for (int x = 0; x < search_window_size_; x++) { - int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_; - int weight = almost_dist2weight_[almostAvgDist]; + int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_); + IT weight = almost_dist2weight_[almostAvgDist]; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; - incWithWeight(estimation, weight, p); + incWithWeight(estimation, weight, p); } } for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) - estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum; + estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum/2) / weights_sum; - dst_.at(i,j) = saturateCastFromArray(estimation); + dst_.at(i,j) = saturateCastFromArray(estimation); } } } -template -inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( +template +inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( int i, - Array2d& dist_sums, - Array3d& col_dist_sums, - Array3d& up_col_dist_sums) const + Array2d& dist_sums, + Array3d& col_dist_sums, + Array3d& up_col_dist_sums) const { int j = 0; @@ -276,7 +276,7 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) { - int dist = calcDist(extended_src_, + int dist = calcDist(extended_src_, border_size_ + i + ty, border_size_ + j + tx, border_size_ + start_y + ty, border_size_ + start_x + tx); @@ -288,12 +288,12 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( } } -template -inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( +template +inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, - Array2d& dist_sums, - Array3d& col_dist_sums, - Array3d& up_col_dist_sums) const + Array2d& dist_sums, + Array3d& col_dist_sums, + Array3d& up_col_dist_sums) const { int ay = border_size_ + i; int ax = border_size_ + j + template_window_half_size_; @@ -312,7 +312,7 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( int by = start_by + y; int bx = start_bx + x; for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) - col_dist_sums[new_last_col_num][y][x] += calcDist(extended_src_, ay + ty, ax, by + ty, bx); + col_dist_sums[new_last_col_num][y][x] += calcDist(extended_src_, ay + ty, ax, by + ty, bx); dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x]; up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x]; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index ab7db5d2d..e4e0a3a59 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -44,118 +44,152 @@ using namespace cv; -template static inline int calcDist(const T a, const T b); - -template <> inline int calcDist(const uchar a, const uchar b) +template struct calcDist_ { - return (a-b) * (a-b); + static inline IT f(const T a, const T b); +}; + +template struct calcDist_ +{ + static inline IT f(uchar a, uchar b) + { + return (IT)(a-b) * (IT)(a-b); + } +}; + +template struct calcDist_ +{ + static inline IT f(const Vec2b a, const Vec2b b) + { + return (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]); + } +}; + +template struct calcDist_ +{ + static inline IT f(const Vec3b a, const Vec3b b) + { + return + (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]) + + (IT)(a[2]-b[2])*(IT)(a[2]-b[2]); + } +}; + +template static inline IT calcDist(const T a, const T b) +{ + return calcDist_::f(a, b); } -template <> inline int calcDist(const Vec2b a, const Vec2b b) -{ - return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]); -} - -template <> inline int calcDist(const Vec3b a, const Vec3b b) -{ - return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]); -} - -template static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) +template +static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2) { const T a = m.at(i1, j1); const T b = m.at(i2, j2); - return calcDist(a,b); + return calcDist(a,b); } -template static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) +template struct calcUpDownDist_ { - return calcDist(a_down, b_down) - calcDist(a_up, b_up); + static inline IT f(T a_up, T a_down, T b_up, T b_down) + { + return calcDist(a_down, b_down) - calcDist(a_up, b_up); + } +}; + +template struct calcUpDownDist_ +{ + static inline IT f(uchar a_up, uchar a_down, uchar b_up, uchar b_down) + { + IT A = a_down - b_down; + IT B = a_up - b_up; + return (A-B)*(A+B); + } +}; + +template +static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) +{ + return calcUpDownDist_::f(a_up, a_down, b_up, b_down); +}; + +template struct incWithWeight_ +{ + static inline void f(IT* estimation, IT weight, T p); +}; + +template struct incWithWeight_ +{ + static inline void f(IT* estimation, IT weight, uchar p) + { + estimation[0] += weight * p; + } +}; + +template struct incWithWeight_ +{ + static inline void f(IT* estimation, IT weight, Vec2b p) + { + estimation[0] += weight * p[0]; + estimation[1] += weight * p[1]; + } +}; + +template struct incWithWeight_ +{ + static inline void f(IT* estimation, IT weight, Vec3b p) + { + estimation[0] += weight * p[0]; + estimation[1] += weight * p[1]; + estimation[2] += weight * p[2]; + } +}; + +template +static inline void incWithWeight(IT* estimation, IT weight, T p) +{ + return incWithWeight_::f(estimation, weight, p); } -template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down) +template struct saturateCastFromArray_ { - int A = a_down - b_down; - int B = a_up - b_up; - return (A-B)*(A+B); -} + static inline T f(IT* estimation); +}; -template static inline void incWithWeight(int* estimation, int weight, T p); - -template <> inline void incWithWeight(int* estimation, int weight, uchar p) +template struct saturateCastFromArray_ { - estimation[0] += weight * p; -} + static inline uchar f(IT* estimation) + { + return saturate_cast(estimation[0]); + } +}; -template <> inline void incWithWeight(int* estimation, int weight, Vec2b p) +template struct saturateCastFromArray_ { - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; -} + static inline Vec2b f(IT* estimation) + { + Vec2b res; + res[0] = saturate_cast(estimation[0]); + res[1] = saturate_cast(estimation[1]); + return res; + } +}; -template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) +template struct saturateCastFromArray_ { - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; - estimation[2] += weight * p[2]; -} + static inline Vec3b f(IT* estimation) + { + Vec3b res; + res[0] = saturate_cast(estimation[0]); + res[1] = saturate_cast(estimation[1]); + res[2] = saturate_cast(estimation[2]); + return res; + } +}; -template <> inline void incWithWeight(int* estimation, int weight, int p) +template static inline T saturateCastFromArray(IT* estimation) { - estimation[0] += weight * p; -} - -template <> inline void incWithWeight(int* estimation, int weight, Vec2i p) -{ - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; -} - -template <> inline void incWithWeight(int* estimation, int weight, Vec3i p) -{ - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; - estimation[2] += weight * p[2]; -} - -template static inline T saturateCastFromArray(int* estimation); - -template <> inline uchar saturateCastFromArray(int* estimation) -{ - return saturate_cast(estimation[0]); -} - -template <> inline Vec2b saturateCastFromArray(int* estimation) -{ - Vec2b res; - res[0] = saturate_cast(estimation[0]); - res[1] = saturate_cast(estimation[1]); - return res; -} - -template <> inline Vec3b saturateCastFromArray(int* estimation) -{ - Vec3b res; - res[0] = saturate_cast(estimation[0]); - res[1] = saturate_cast(estimation[1]); - res[2] = saturate_cast(estimation[2]); - return res; -} - -template <> inline int saturateCastFromArray(int* estimation) -{ - return estimation[0]; -} - -template <> inline Vec2i saturateCastFromArray(int* estimation) -{ - estimation[1] = 0; - return Vec2i(estimation); -} - -template <> inline Vec3i saturateCastFromArray(int* estimation) -{ - return Vec3i(estimation); + return saturateCastFromArray_::f(estimation); } #endif diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 191a67127..392733c08 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -50,7 +50,7 @@ using namespace cv; -template +template struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody { @@ -81,21 +81,21 @@ private: int search_window_half_size_; int temporal_window_half_size_; - int fixed_point_mult_; + IT fixed_point_mult_; int almost_template_window_size_sq_bin_shift; - std::vector almost_dist2weight; + std::vector almost_dist2weight; - void calcDistSumsForFirstElementInRow(int i, Array3d& dist_sums, - Array4d& col_dist_sums, - Array4d& up_col_dist_sums) const; + void calcDistSumsForFirstElementInRow(int i, Array3d& dist_sums, + Array4d& col_dist_sums, + Array4d& up_col_dist_sums) const; void calcDistSumsForElementInFirstRow(int i, int j, int first_col_num, - Array3d& dist_sums, Array4d& col_dist_sums, - Array4d& up_col_dist_sums) const; + Array3d& dist_sums, Array4d& col_dist_sums, + Array4d& up_col_dist_sums) const; }; -template -FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( +template +FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, @@ -125,8 +125,9 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT); main_extended_src_ = extended_srcs_[temporal_window_half_size_]; - const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255; - fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; + const IT max_estimate_sum_value = + (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * 255; + fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift @@ -138,7 +139,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; - int max_dist = 255 * 255 * sizeof(T); + IT max_dist = 255 * 255 * sizeof(T); int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight.resize(almost_max_dist); @@ -146,7 +147,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -160,19 +161,19 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type()); } -template -void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const +template +void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { int row_from = range.start; int row_to = range.end - 1; - Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); + Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); // for lazy calc optimization - Array4d col_dist_sums(template_window_size_, temporal_window_size_, search_window_size_, search_window_size_); + Array4d col_dist_sums(template_window_size_, temporal_window_size_, search_window_size_, search_window_size_); int first_col_num = -1; - Array4d up_col_dist_sums(cols_, temporal_window_size_, search_window_size_, search_window_size_); + Array4d up_col_dist_sums(cols_, temporal_window_size_, search_window_size_, search_window_size_); for (int i = row_from; i <= row_to; i++) { @@ -216,15 +217,15 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const for (int d = 0; d < temporal_window_size_; d++) { Mat cur_extended_src = extended_srcs_[d]; - Array2d cur_dist_sums = dist_sums[d]; - Array2d cur_col_dist_sums = col_dist_sums[first_col_num][d]; - Array2d cur_up_col_dist_sums = up_col_dist_sums[j][d]; + Array2d cur_dist_sums = dist_sums[d]; + Array2d cur_col_dist_sums = col_dist_sums[first_col_num][d]; + Array2d cur_up_col_dist_sums = up_col_dist_sums[j][d]; for (int y = 0; y < search_window_size; y++) { - int* dist_sums_row = cur_dist_sums.row_ptr(y); + IT* dist_sums_row = cur_dist_sums.row_ptr(y); - int* col_dist_sums_row = cur_col_dist_sums.row_ptr(y); - int* up_col_dist_sums_row = cur_up_col_dist_sums.row_ptr(y); + IT* col_dist_sums_row = cur_col_dist_sums.row_ptr(y); + IT* up_col_dist_sums_row = cur_up_col_dist_sums.row_ptr(y); const T* b_up_ptr = cur_extended_src.ptr(start_by - template_window_half_size_ - 1 + y); const T* b_down_ptr = cur_extended_src.ptr(start_by + template_window_half_size_ + y); @@ -234,7 +235,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const dist_sums_row[x] -= col_dist_sums_row[x]; col_dist_sums_row[x] = up_col_dist_sums_row[x] + - calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); + calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); dist_sums_row[x] += col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x]; @@ -247,9 +248,9 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const } // calc weights - int weights_sum = 0; + IT weights_sum = 0; - int estimation[3]; + IT estimation[3]; for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) estimation[channel_num] = 0; @@ -260,33 +261,33 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { const T* cur_row_ptr = esrc_d.ptr(border_size_ + search_window_y + y); - int* dist_sums_row = dist_sums.row_ptr(d, y); + IT* dist_sums_row = dist_sums.row_ptr(d, y); for (int x = 0; x < search_window_size_; x++) { - int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift; + int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift); - int weight = almost_dist2weight[almostAvgDist]; + IT weight = almost_dist2weight[almostAvgDist]; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; - incWithWeight(estimation, weight, p); + incWithWeight(estimation, weight, p); } } } for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) - estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum; + estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum / 2) / weights_sum; // ???? - dst_.at(i,j) = saturateCastFromArray(estimation); + dst_.at(i,j) = saturateCastFromArray(estimation); } } } -template -inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( - int i, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const +template +inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( + int i, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { int j = 0; @@ -303,14 +304,14 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRo int start_y = i + y - search_window_half_size_; int start_x = j + x - search_window_half_size_; - int* dist_sums_ptr = &dist_sums[d][y][x]; - int* col_dist_sums_ptr = &col_dist_sums[0][d][y][x]; + IT* dist_sums_ptr = &dist_sums[d][y][x]; + IT* col_dist_sums_ptr = &col_dist_sums[0][d][y][x]; int col_dist_sums_step = col_dist_sums.step_size(0); for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) { for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) { - int dist = calcDist( + IT dist = calcDist( main_extended_src_.at(border_size_ + i + ty, border_size_ + j + tx), cur_extended_src.at(border_size_ + start_y + ty, border_size_ + start_x + tx)); @@ -325,10 +326,10 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRo } } -template -inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( - int i, int j, int first_col_num, Array3d& dist_sums, - Array4d& col_dist_sums, Array4d& up_col_dist_sums) const +template +inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( + int i, int j, int first_col_num, Array3d& dist_sums, + Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { int ay = border_size_ + i; int ax = border_size_ + j + template_window_half_size_; @@ -350,10 +351,10 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRo int by = start_by + y; int bx = start_bx + x; - int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x]; + IT* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x]; for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) { - *col_dist_sums_ptr += calcDist( + *col_dist_sums_ptr += calcDist( main_extended_src_.at(ay + ty, ax), cur_extended_src.at(by + ty, bx)); } From 8368fb9ea8dc03a5d09a3d701858ee272a9c818a Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Thu, 12 Feb 2015 18:45:09 +0100 Subject: [PATCH 02/40] Additional refactoring preparing for 16-bit implementation --- .../src/fast_nlmeans_denoising_invoker.hpp | 12 +- ...fast_nlmeans_denoising_invoker_commons.hpp | 113 +++++++++++------- .../fast_nlmeans_multi_denoising_invoker.hpp | 9 +- 3 files changed, 83 insertions(+), 51 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 2ad0189ef..202e36013 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -107,7 +107,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const float h) : src_(src), dst_(dst) { - CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b + CV_Assert(src.channels() == pixelInfo::channels); template_window_half_size_ = template_window_size / 2; search_window_half_size_ = search_window_size / 2; @@ -117,17 +117,21 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( border_size_ = search_window_half_size_ + template_window_half_size_; copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT); - const IT max_estimate_sum_value = (IT)search_window_size_ * (IT)search_window_size_ * 255; + const IT max_estimate_sum_value = + (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo::sampleMax(); fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift + // squared distances are truncated to 16 bits to get a reasonable table size CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX) int template_window_size_sq = template_window_size_ * template_window_size_; - almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); + almost_template_window_size_sq_bin_shift_ = + getNearestPowerOf2(template_window_size_sq) + 2*pixelInfo::sampleBits() - 16; double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; - IT max_dist = 255 * 255 * sizeof(T); + IT max_dist = + (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight_.resize(almost_max_dist); diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index e4e0a3a59..0a8713b91 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -44,30 +44,62 @@ using namespace cv; -template struct calcDist_ +template struct pixelInfo_ { - static inline IT f(const T a, const T b); + static const int channels = 1; + typedef T sampleType; }; -template struct calcDist_ +template struct pixelInfo_ > { - static inline IT f(uchar a, uchar b) + static const int channels = n; + typedef ET sampleType; +}; + +template struct pixelInfo: public pixelInfo_ +{ + using typename pixelInfo_::sampleType; + + static inline sampleType sampleMax() + { + return std::numeric_limits::max(); + } + + static inline sampleType sampleMin() + { + return std::numeric_limits::min(); + } + + static inline size_t sampleBytes() + { + return sizeof(sampleType); + } + + static inline size_t sampleBits() + { + return 8*sampleBytes(); + } +}; + +template struct calcDist_ +{ + static inline IT f(const T a, const T b) { return (IT)(a-b) * (IT)(a-b); } }; -template struct calcDist_ +template struct calcDist_, IT> { - static inline IT f(const Vec2b a, const Vec2b b) + static inline IT f(const Vec a, const Vec b) { return (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]); } }; -template struct calcDist_ +template struct calcDist_, IT> { - static inline IT f(const Vec3b a, const Vec3b b) + static inline IT f(const Vec a, const Vec b) { return (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + @@ -92,14 +124,6 @@ static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2) template struct calcUpDownDist_ { static inline IT f(T a_up, T a_down, T b_up, T b_down) - { - return calcDist(a_down, b_down) - calcDist(a_up, b_up); - } -}; - -template struct calcUpDownDist_ -{ - static inline IT f(uchar a_up, uchar a_down, uchar b_up, uchar b_down) { IT A = a_down - b_down; IT B = a_up - b_up; @@ -107,6 +131,17 @@ template struct calcUpDownDist_ } }; +template struct calcUpDownDist_, IT> +{ +private: + typedef Vec T; +public: + static inline IT f(T a_up, T a_down, T b_up, T b_down) + { + return calcDist(a_down, b_down) - calcDist(a_up, b_up); + } +}; + template static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) { @@ -115,29 +150,24 @@ static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) template struct incWithWeight_ { - static inline void f(IT* estimation, IT weight, T p); -}; - -template struct incWithWeight_ -{ - static inline void f(IT* estimation, IT weight, uchar p) + static inline void f(IT* estimation, IT weight, T p) { estimation[0] += weight * p; } }; -template struct incWithWeight_ +template struct incWithWeight_, IT> { - static inline void f(IT* estimation, IT weight, Vec2b p) + static inline void f(IT* estimation, IT weight, Vec p) { estimation[0] += weight * p[0]; estimation[1] += weight * p[1]; } }; -template struct incWithWeight_ +template struct incWithWeight_, IT> { - static inline void f(IT* estimation, IT weight, Vec3b p) + static inline void f(IT* estimation, IT weight, Vec p) { estimation[0] += weight * p[0]; estimation[1] += weight * p[1]; @@ -153,36 +183,31 @@ static inline void incWithWeight(IT* estimation, IT weight, T p) template struct saturateCastFromArray_ { - static inline T f(IT* estimation); -}; - -template struct saturateCastFromArray_ -{ - static inline uchar f(IT* estimation) + static inline T f(IT* estimation) { - return saturate_cast(estimation[0]); + return saturate_cast(estimation[0]); } }; -template struct saturateCastFromArray_ +template struct saturateCastFromArray_, IT> { - static inline Vec2b f(IT* estimation) + static inline Vec f(IT* estimation) { - Vec2b res; - res[0] = saturate_cast(estimation[0]); - res[1] = saturate_cast(estimation[1]); + Vec res; + res[0] = saturate_cast(estimation[0]); + res[1] = saturate_cast(estimation[1]); return res; } }; -template struct saturateCastFromArray_ +template struct saturateCastFromArray_, IT> { - static inline Vec3b f(IT* estimation) + static inline Vec f(IT* estimation) { - Vec3b res; - res[0] = saturate_cast(estimation[0]); - res[1] = saturate_cast(estimation[1]); - res[2] = saturate_cast(estimation[2]); + Vec res; + res[0] = saturate_cast(estimation[0]); + res[1] = saturate_cast(estimation[1]); + res[2] = saturate_cast(estimation[2]); return res; } }; diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 392733c08..48276b426 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -106,7 +106,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( dst_(dst), extended_srcs_(srcImgs.size()) { CV_Assert(srcImgs.size() > 0); - CV_Assert(srcImgs[0].channels() == sizeof(T)); + CV_Assert(srcImgs[0].channels() == pixelInfo::channels); rows_ = srcImgs[0].rows; cols_ = srcImgs[0].cols; @@ -126,20 +126,23 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( main_extended_src_ = extended_srcs_[temporal_window_half_size_]; const IT max_estimate_sum_value = - (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * 255; + (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo::sampleMax(); fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift + // squared distances are truncated to 16 bits to get a reasonable table size int template_window_size_sq = template_window_size_ * template_window_size_; almost_template_window_size_sq_bin_shift = 0; while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq) almost_template_window_size_sq_bin_shift++; + almost_template_window_size_sq_bin_shift += 2*pixelInfo::sampleBits() - 16; int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; - IT max_dist = 255 * 255 * sizeof(T); + IT max_dist = + (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight.resize(almost_max_dist); From 49e93747b17cae65915c66b326e37a94ddc53190 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Thu, 12 Feb 2015 22:05:05 +0100 Subject: [PATCH 03/40] Added saturate_cast from int64 and uint64 --- modules/core/include/opencv2/core/base.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index f2acaa3fb..73beb911f 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -442,6 +442,10 @@ template static inline _Tp saturate_cast(int v) { return _Tp( template static inline _Tp saturate_cast(float v) { return _Tp(v); } /** @overload */ template static inline _Tp saturate_cast(double v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(int64 v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(uint64 v) { return _Tp(v); } //! @cond IGNORED @@ -452,6 +456,8 @@ template<> inline uchar saturate_cast(short v) { return saturate_c template<> inline uchar saturate_cast(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); } template<> inline uchar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } template<> inline uchar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline uchar saturate_cast(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } +template<> inline uchar saturate_cast(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); } template<> inline schar saturate_cast(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); } template<> inline schar saturate_cast(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); } @@ -460,6 +466,8 @@ template<> inline schar saturate_cast(short v) { return saturate_c template<> inline schar saturate_cast(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); } template<> inline schar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } template<> inline schar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline schar saturate_cast(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } +template<> inline schar saturate_cast(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); } template<> inline ushort saturate_cast(schar v) { return (ushort)std::max((int)v, 0); } template<> inline ushort saturate_cast(short v) { return (ushort)std::max((int)v, 0); } @@ -467,12 +475,16 @@ template<> inline ushort saturate_cast(int v) { return (ushort)(( template<> inline ushort saturate_cast(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); } template<> inline ushort saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } template<> inline ushort saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline ushort saturate_cast(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } +template<> inline ushort saturate_cast(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); } template<> inline short saturate_cast(ushort v) { return (short)std::min((int)v, SHRT_MAX); } template<> inline short saturate_cast(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } template<> inline short saturate_cast(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); } template<> inline short saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } template<> inline short saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline short saturate_cast(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } +template<> inline short saturate_cast(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); } template<> inline int saturate_cast(float v) { return cvRound(v); } template<> inline int saturate_cast(double v) { return cvRound(v); } From 42db9e7153a6d10b429df0bc2108278251c11ebc Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Thu, 12 Feb 2015 22:14:01 +0100 Subject: [PATCH 04/40] Basic 16-bit implmentation of fastNlMeansDenoising. Table-based exponetiation leads to high memory footprint and loss of precision in 16-bit mode. --- modules/photo/src/denoising.cpp | 43 ++++++++++++++++--- .../src/fast_nlmeans_denoising_invoker.hpp | 14 +++--- .../fast_nlmeans_multi_denoising_invoker.hpp | 15 ++++--- 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 724ea0eb0..0abeefe5b 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -65,17 +65,32 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16U: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC2: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64>( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC3: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64>( src, dst, templateWindowSize, searchWindowSize, h)); break; default: @@ -181,13 +196,31 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds break; case CV_8UC2: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16U: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC2: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC3: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 202e36013..27a016ae9 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -123,11 +123,13 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift - // squared distances are truncated to 16 bits to get a reasonable table size + // squared distances are truncated to 24 bits to avoid unreasonable table sizes + // TODO: uses lots of memory and loses precision wtih 16-bit images ???? + const size_t TABLE_MAX_BITS = 24; CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX) int template_window_size_sq = template_window_size_ * template_window_size_; - almost_template_window_size_sq_bin_shift_ = - getNearestPowerOf2(template_window_size_sq) + 2*pixelInfo::sampleBits() - 16; + almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq) + + std::max(2*pixelInfo::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS; double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; IT max_dist = @@ -139,7 +141,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -232,7 +234,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co // calc weights IT estimation[3], weights_sum = 0; - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = 0; for (int y = 0; y < search_window_size_; y++) @@ -250,7 +252,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co } } - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum/2) / weights_sum; dst_.at(i,j) = saturateCastFromArray(estimation); diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 48276b426..c90249b82 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -131,12 +131,15 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift - // squared distances are truncated to 16 bits to get a reasonable table size + // squared distances are truncated to 24 bits to avoid unreasonable table sizes + // TODO: uses lots of memory and loses precision wtih 16-bit images ???? + const size_t TABLE_MAX_BITS = 24; int template_window_size_sq = template_window_size_ * template_window_size_; almost_template_window_size_sq_bin_shift = 0; while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq) almost_template_window_size_sq_bin_shift++; - almost_template_window_size_sq_bin_shift += 2*pixelInfo::sampleBits() - 16; + almost_template_window_size_sq_bin_shift += + std::max(2*pixelInfo::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS; int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; @@ -150,7 +153,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -254,7 +257,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang IT weights_sum = 0; IT estimation[3]; - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = 0; for (int d = 0; d < temporal_window_size_; d++) @@ -279,8 +282,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang } } - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) - estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum / 2) / weights_sum; // ???? + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) + estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum / 2) / weights_sum; dst_.at(i,j) = saturateCastFromArray(estimation); From d588c717da1ad2b77e03b058a281da3c00ba0327 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 13 Feb 2015 00:11:30 +0100 Subject: [PATCH 05/40] Using WEIGHT_THRESHOLD to limit table size. Still problematic with 16-bit and big h-values. --- .../src/fast_nlmeans_denoising_invoker.hpp | 30 +++++++++---------- .../fast_nlmeans_multi_denoising_invoker.hpp | 29 +++++++++--------- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 27a016ae9..c9689cabd 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -123,31 +123,28 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift - // squared distances are truncated to 24 bits to avoid unreasonable table sizes - // TODO: uses lots of memory and loses precision wtih 16-bit images ???? - const size_t TABLE_MAX_BITS = 24; CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX) int template_window_size_sq = template_window_size_ * template_window_size_; - almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq) + - std::max(2*pixelInfo::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS; + almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; + const double WEIGHT_THRESHOLD = 0.001; + const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); - almost_dist2weight_.resize(almost_max_dist); - - const double WEIGHT_THRESHOLD = 0.001; - for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) + int almost_max_dist = 0; + while (true) { - double dist = almost_dist * almost_dist2actual_dist_multiplier; + double dist = almost_max_dist * almost_dist2actual_dist_multiplier; IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) - weight = 0; + if (almost_max_dist >= almost_dist2weight_.size()) + almost_dist2weight_.resize(almost_max_dist + ALLOC_CHUNK); - almost_dist2weight_[almost_dist] = weight; + almost_dist2weight_[almost_max_dist++] = weight; } + almost_dist2weight_.resize(almost_max_dist); CV_Assert(almost_dist2weight_[0] == fixed_point_mult_); // additional optimization init end @@ -161,6 +158,8 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co int row_from = range.start; int row_to = range.end - 1; + int almost_max_dist = almost_dist2weight_.size(); + // sums of cols anf rows for current pixel p Array2d dist_sums(search_window_size_, search_window_size_); @@ -244,7 +243,8 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co for (int x = 0; x < search_window_size_; x++) { int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_); - IT weight = almost_dist2weight_[almostAvgDist]; + IT weight = + almostAvgDist < almost_max_dist ? almost_dist2weight_[almostAvgDist] : 0; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index c90249b82..b4bfc0c6c 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -131,35 +131,31 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift - // squared distances are truncated to 24 bits to avoid unreasonable table sizes - // TODO: uses lots of memory and loses precision wtih 16-bit images ???? - const size_t TABLE_MAX_BITS = 24; int template_window_size_sq = template_window_size_ * template_window_size_; almost_template_window_size_sq_bin_shift = 0; while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq) almost_template_window_size_sq_bin_shift++; - almost_template_window_size_sq_bin_shift += - std::max(2*pixelInfo::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS; int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; + const double WEIGHT_THRESHOLD = 0.001; + const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); - almost_dist2weight.resize(almost_max_dist); - - const double WEIGHT_THRESHOLD = 0.001; - for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) + int almost_max_dist = 0; + while (true) { - double dist = almost_dist * almost_dist2actual_dist_multiplier; + double dist = almost_max_dist * almost_dist2actual_dist_multiplier; IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) - weight = 0; + if (almost_max_dist >= almost_dist2weight.size()) + almost_dist2weight.resize(almost_max_dist + ALLOC_CHUNK); - almost_dist2weight[almost_dist] = weight; + almost_dist2weight[almost_max_dist++] = weight; } + almost_dist2weight.resize(almost_max_dist); CV_Assert(almost_dist2weight[0] == fixed_point_mult_); // additional optimization init end @@ -173,6 +169,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang int row_from = range.start; int row_to = range.end - 1; + int almost_max_dist = almost_dist2weight.size(); + Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); // for lazy calc optimization @@ -273,7 +271,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang { int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift); - IT weight = almost_dist2weight[almostAvgDist]; + IT weight = + almostAvgDist < almost_max_dist ? almost_dist2weight[almostAvgDist] : 0; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; From 584372bbf297c386ce71357d70b65068551b9466 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 13 Feb 2015 04:33:29 +0100 Subject: [PATCH 06/40] Fixed bounds checking --- modules/photo/src/fast_nlmeans_denoising_invoker.hpp | 6 +++--- modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index c9689cabd..2de50a77b 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -132,7 +132,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - int almost_max_dist = 0; + size_t almost_max_dist = 0; while (true) { double dist = almost_max_dist * almost_dist2actual_dist_multiplier; @@ -158,7 +158,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co int row_from = range.start; int row_to = range.end - 1; - int almost_max_dist = almost_dist2weight_.size(); + size_t almost_max_dist = almost_dist2weight_.size(); // sums of cols anf rows for current pixel p Array2d dist_sums(search_window_size_, search_window_size_); @@ -242,7 +242,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co IT* dist_sums_row = dist_sums.row_ptr(y); for (int x = 0; x < search_window_size_; x++) { - int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_); + size_t almostAvgDist = (size_t)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_); IT weight = almostAvgDist < almost_max_dist ? almost_dist2weight_[almostAvgDist] : 0; weights_sum += weight; diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index b4bfc0c6c..e0f06c68f 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -143,7 +143,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - int almost_max_dist = 0; + size_t almost_max_dist = 0; while (true) { double dist = almost_max_dist * almost_dist2actual_dist_multiplier; @@ -169,7 +169,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang int row_from = range.start; int row_to = range.end - 1; - int almost_max_dist = almost_dist2weight.size(); + size_t almost_max_dist = almost_dist2weight.size(); Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); @@ -269,7 +269,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang for (int x = 0; x < search_window_size_; x++) { - int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift); + size_t almostAvgDist = (size_t)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift); IT weight = almostAvgDist < almost_max_dist ? almost_dist2weight[almostAvgDist] : 0; From c339720af9cf93ee0c130c55b2a7d2621bca72dc Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 13 Feb 2015 13:38:37 +0100 Subject: [PATCH 07/40] Preparation for 16-bit colored denoising. Currently not working due to cvtColor not supportint 16-bit Lab conversion. --- modules/photo/src/denoising.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 0abeefe5b..8f9d1f84a 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -80,7 +80,7 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, break; case CV_16U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_16UC2: @@ -95,7 +95,7 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, break; default: CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8UC1, CV_8UC2 and CV_8UC3 are supported"); + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_16U, CV_16UC2, and CV_16UC3 are supported"); } } @@ -105,9 +105,9 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); Size src_size = _src.size(); - if (type != CV_8UC3 && type != CV_8UC4) + if (type != CV_8UC3 && type != CV_16UC3 && type != CV_8UC4 && type != CV_16UC4) { - CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!"); + CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3, CV_16UC3, CV_8UC4, or CV_16UC4"); return; } @@ -123,8 +123,8 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, Mat src_lab; cvtColor(src, src_lab, COLOR_LBGR2Lab); - Mat l(src_size, CV_8U); - Mat ab(src_size, CV_8UC2); + Mat l(src_size, CV_MAKE_TYPE(depth, 1)); + Mat ab(src_size, CV_MAKE_TYPE(depth, 2)); Mat l_ab[] = { l, ab }; int from_to[] = { 0,0, 1,1, 2,2 }; mixChannels(&src_lab, 1, l_ab, 2, from_to, 3); @@ -190,7 +190,7 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds { case CV_8U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; @@ -226,7 +226,7 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds break; default: CV_Error(Error::StsBadArg, - "Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported"); + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_16U, CV_16UC2, and CV_16UC3 are supported"); } } @@ -245,11 +245,12 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr _dst.create(srcImgs[0].size(), srcImgs[0].type()); Mat dst = _dst.getMat(); + int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type); int src_imgs_size = static_cast(srcImgs.size()); - if (srcImgs[0].type() != CV_8UC3) + if (type != CV_8UC3 && type != CV_16UC3) { - CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!"); + CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3 or CV_16UC3!"); return; } @@ -261,9 +262,9 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr std::vector ab(src_imgs_size); for (int i = 0; i < src_imgs_size; i++) { - src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3); - l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1); - ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2); + src_lab[i] = Mat::zeros(srcImgs[0].size(), type); + l[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 1)); + ab[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 2)); cvtColor(srcImgs[i], src_lab[i], COLOR_LBGR2Lab); Mat l_ab[] = { l[i], ab[i] }; From baf266c29eea897d13ae9ce0f85539a74f264b5f Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 17 Feb 2015 21:30:52 +0100 Subject: [PATCH 08/40] Changed from sum of squared differences to sum of abs differences --- .../src/fast_nlmeans_denoising_invoker.hpp | 5 ++- ...fast_nlmeans_denoising_invoker_commons.hpp | 32 +++---------------- .../fast_nlmeans_multi_denoising_invoker.hpp | 5 ++- 3 files changed, 8 insertions(+), 34 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 2de50a77b..cbf9d259f 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -130,13 +130,12 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const double WEIGHT_THRESHOLD = 0.001; const size_t ALLOC_CHUNK = 65536; - IT max_dist = - (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; + IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; size_t almost_max_dist = 0; while (true) { double dist = almost_max_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; if (almost_max_dist >= almost_dist2weight_.size()) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index 0a8713b91..4ca63d652 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -85,7 +85,7 @@ template struct calcDist_ { static inline IT f(const T a, const T b) { - return (IT)(a-b) * (IT)(a-b); + return std::abs((IT)(a-b)); } }; @@ -93,7 +93,7 @@ template struct calcDist_, IT> { static inline IT f(const Vec a, const Vec b) { - return (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]); + return std::abs((IT)(a[0]-b[0])) + std::abs((IT)(a[1]-b[1])); } }; @@ -101,10 +101,7 @@ template struct calcDist_, IT> { static inline IT f(const Vec a, const Vec b) { - return - (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + - (IT)(a[1]-b[1])*(IT)(a[1]-b[1]) + - (IT)(a[2]-b[2])*(IT)(a[2]-b[2]); + return std::abs((IT)(a[0]-b[0])) + std::abs((IT)(a[1]-b[1])) + std::abs((IT)(a[2]-b[2])); } }; @@ -121,31 +118,10 @@ static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2) return calcDist(a,b); } -template struct calcUpDownDist_ -{ - static inline IT f(T a_up, T a_down, T b_up, T b_down) - { - IT A = a_down - b_down; - IT B = a_up - b_up; - return (A-B)*(A+B); - } -}; - -template struct calcUpDownDist_, IT> -{ -private: - typedef Vec T; -public: - static inline IT f(T a_up, T a_down, T b_up, T b_down) - { - return calcDist(a_down, b_down) - calcDist(a_up, b_up); - } -}; - template static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) { - return calcUpDownDist_::f(a_up, a_down, b_up, b_down); + return calcDist(a_down, b_down) - calcDist(a_up, b_up); }; template struct incWithWeight_ diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index e0f06c68f..f12a0ef50 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -141,13 +141,12 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( const double WEIGHT_THRESHOLD = 0.001; const size_t ALLOC_CHUNK = 65536; - IT max_dist = - (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; + IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; size_t almost_max_dist = 0; while (true) { double dist = almost_max_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; if (almost_max_dist >= almost_dist2weight.size()) From e647b7c7e8a15765f7a18ed496fd2313338b900f Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 17 Feb 2015 23:08:36 +0100 Subject: [PATCH 09/40] Calculating almost_dist2weight at full size to avoid bounds checking --- .../src/fast_nlmeans_denoising_invoker.hpp | 31 ++++++++----------- .../fast_nlmeans_multi_denoising_invoker.hpp | 31 ++++++++----------- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index cbf9d259f..a641c990e 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -128,22 +128,20 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; - const double WEIGHT_THRESHOLD = 0.001; - const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - size_t almost_max_dist = 0; - while (true) - { - double dist = almost_max_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; - - if (almost_max_dist >= almost_dist2weight_.size()) - almost_dist2weight_.resize(almost_max_dist + ALLOC_CHUNK); - - almost_dist2weight_[almost_max_dist++] = weight; - } + size_t almost_max_dist = (size_t)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight_.resize(almost_max_dist); + + const double WEIGHT_THRESHOLD = 0.001; + for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) + { + double dist = almost_dist * almost_dist2actual_dist_multiplier; + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) + weight = 0; + + almost_dist2weight_[almost_dist] = weight; + } CV_Assert(almost_dist2weight_[0] == fixed_point_mult_); // additional optimization init end @@ -157,8 +155,6 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co int row_from = range.start; int row_to = range.end - 1; - size_t almost_max_dist = almost_dist2weight_.size(); - // sums of cols anf rows for current pixel p Array2d dist_sums(search_window_size_, search_window_size_); @@ -242,8 +238,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co for (int x = 0; x < search_window_size_; x++) { size_t almostAvgDist = (size_t)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_); - IT weight = - almostAvgDist < almost_max_dist ? almost_dist2weight_[almostAvgDist] : 0; + IT weight = almost_dist2weight_[almostAvgDist]; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index f12a0ef50..808b01f50 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -139,22 +139,20 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; - const double WEIGHT_THRESHOLD = 0.001; - const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - size_t almost_max_dist = 0; - while (true) - { - double dist = almost_max_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; - - if (almost_max_dist >= almost_dist2weight.size()) - almost_dist2weight.resize(almost_max_dist + ALLOC_CHUNK); - - almost_dist2weight[almost_max_dist++] = weight; - } + int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight.resize(almost_max_dist); + + const double WEIGHT_THRESHOLD = 0.001; + for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) + { + double dist = almost_dist * almost_dist2actual_dist_multiplier; + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) + weight = 0; + + almost_dist2weight[almost_dist] = weight; + } CV_Assert(almost_dist2weight[0] == fixed_point_mult_); // additional optimization init end @@ -168,8 +166,6 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang int row_from = range.start; int row_to = range.end - 1; - size_t almost_max_dist = almost_dist2weight.size(); - Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); // for lazy calc optimization @@ -270,8 +266,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang { size_t almostAvgDist = (size_t)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift); - IT weight = - almostAvgDist < almost_max_dist ? almost_dist2weight[almostAvgDist] : 0; + IT weight = almost_dist2weight[almostAvgDist]; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; From 8e7aff44869439f04ef9c0f3ae43b7c6f143c715 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Wed, 18 Feb 2015 14:59:52 +0100 Subject: [PATCH 10/40] Changed fastNlMeansDenoising and fastNlMeansDenoisingMulti back to sum of sq distances. Moved sq sum of abs distances to fastNlMeansDenoisingAbs and fastNlMeansDenoisingMultiAbs --- modules/photo/include/opencv2/photo.hpp | 56 ++++++ modules/photo/src/denoising.cpp | 102 +++++++++-- .../src/fast_nlmeans_denoising_invoker.hpp | 28 +-- ...fast_nlmeans_denoising_invoker_commons.hpp | 159 +++++++++++++++--- .../fast_nlmeans_multi_denoising_invoker.hpp | 28 +-- 5 files changed, 301 insertions(+), 72 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index 2d1087e89..c25a35e6d 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -138,6 +138,31 @@ parameter. CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Perform image denoising using Non-local Means Denoising +algorithm +with several computational optimizations. Noise expected to be a +gaussian white noise. Uses squared sum of absolute value distances +instead of sum of squared distances for weight calculation + +@param src Input 8-bit or 16-bit 1-channel, 2-channel or 3-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength. Big h value perfectly removes noise but also +removes image details, smaller h value preserves details but also preserves some noise + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ +CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, float h = 3, + int templateWindowSize = 7, int searchWindowSize = 21); + /** @brief Modification of fastNlMeansDenoising function for colored images @param src Input 8-bit 3-channel image. @@ -186,6 +211,37 @@ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputA int imgToDenoiseIndex, int temporalWindowSize, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Modification of fastNlMeansDenoising function for images +sequence where consequtive images have been captured in small period +of time. For example video. This version of the function is for +grayscale images or for manual manipulation with colorspaces. For more +details see +. Uses +squared sum of absolute value distances instead of sum of squared +distances for weight calculation + +@param srcImgs Input 8-bit or 16-bit 1-channel, 2-channel or 3-channel +images sequence. All images should +have the same type and size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly +removes noise but also removes image details, smaller h value preserves details but also preserves +some noise + */ +CV_EXPORTS_W void fastNlMeansDenoisingMultiAbs( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); + /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences @param srcImgs Input 8-bit 3-channel images sequence. All images should have the same type and diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 8f9d1f84a..52065b5f6 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -65,32 +65,62 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported image format! Only CV_8U, CV_8UC2, and CV_8UC3 are supported"); + } +} + +void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, + int templateWindowSize, int searchWindowSize) +{ + Size src_size = _src.size(); + Mat src = _src.getMat(); + _dst.create(src_size, src.type()); + Mat dst = _dst.getMat(); + + switch (src.type()) { + case CV_8U: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC2: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC3: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_16U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_16UC2: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64>( + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs>( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_16UC3: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64>( + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs>( src, dst, templateWindowSize, searchWindowSize, h)); break; default: @@ -105,9 +135,9 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); Size src_size = _src.size(); - if (type != CV_8UC3 && type != CV_16UC3 && type != CV_8UC4 && type != CV_16UC4) + if (type != CV_8UC3 && type != CV_8UC4) { - CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3, CV_16UC3, CV_8UC4, or CV_16UC4"); + CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3 or CV_8UC4!"); return; } @@ -190,37 +220,77 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds { case CV_8U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported image format! Only CV_8U, CV_8UC2, and CV_8UC3 are supported"); + } +} + +void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray _dst, + int imgToDenoiseIndex, int temporalWindowSize, + float h, int templateWindowSize, int searchWindowSize) +{ + std::vector srcImgs; + _srcImgs.getMatVector(srcImgs); + + fastNlMeansDenoisingMultiCheckPreconditions( + srcImgs, imgToDenoiseIndex, + temporalWindowSize, templateWindowSize, searchWindowSize); + + _dst.create(srcImgs[0].size(), srcImgs[0].type()); + Mat dst = _dst.getMat(); + + switch (srcImgs[0].type()) + { + case CV_8U: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC2: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC3: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_16U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_16UC2: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_16UC3: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; @@ -248,9 +318,9 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type); int src_imgs_size = static_cast(srcImgs.size()); - if (type != CV_8UC3 && type != CV_16UC3) + if (type != CV_8UC3) { - CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3 or CV_16UC3!"); + CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!"); return; } diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index a641c990e..468fa82f7 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -50,7 +50,7 @@ using namespace cv; -template +template struct FastNlMeansDenoisingInvoker : public ParallelLoopBody { @@ -99,8 +99,8 @@ inline int getNearestPowerOf2(int value) return p; } -template -FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( +template +FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const Mat& src, Mat& dst, int template_window_size, int search_window_size, @@ -128,7 +128,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; - IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; + IT max_dist = D::template maxDist(); size_t almost_max_dist = (size_t)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight_.resize(almost_max_dist); @@ -136,7 +136,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); + IT weight = (IT)round(fixed_point_mult_ * D::template calcWeight(dist, h)); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -149,8 +149,8 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( dst_ = Mat::zeros(src_.size(), src_.type()); } -template -void FastNlMeansDenoisingInvoker::operator() (const Range& range) const +template +void FastNlMeansDenoisingInvoker::operator() (const Range& range) const { int row_from = range.start; int row_to = range.end - 1; @@ -215,7 +215,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co dist_sums_row[x] -= col_dist_sums_row[x]; int bx = start_bx + x; - col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); + col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); dist_sums_row[x] += col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x]; @@ -254,8 +254,8 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co } } -template -inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( +template +inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( int i, Array2d& dist_sums, Array3d& col_dist_sums, @@ -276,7 +276,7 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElement for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) { - int dist = calcDist(extended_src_, + int dist = D::template calcDist(extended_src_, border_size_ + i + ty, border_size_ + j + tx, border_size_ + start_y + ty, border_size_ + start_x + tx); @@ -288,8 +288,8 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElement } } -template -inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( +template +inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, Array2d& dist_sums, Array3d& col_dist_sums, @@ -312,7 +312,7 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFir int by = start_by + y; int bx = start_bx + x; for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) - col_dist_sums[new_last_col_num][y][x] += calcDist(extended_src_, ay + ty, ax, by + ty, bx); + col_dist_sums[new_last_col_num][y][x] += D::template calcDist(extended_src_, ay + ty, ax, by + ty, bx); dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x]; up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x]; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index 4ca63d652..d55d93ce7 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -81,47 +81,150 @@ template struct pixelInfo: public pixelInfo_ } }; -template struct calcDist_ +class DistAbs { - static inline IT f(const T a, const T b) + template struct calcDist_ { - return std::abs((IT)(a-b)); + static inline IT f(const T a, const T b) + { + return std::abs((IT)(a-b)); + } + }; + + template struct calcDist_, IT> + { + static inline IT f(const Vec a, const Vec b) + { + return std::abs((IT)(a[0]-b[0])) + std::abs((IT)(a[1]-b[1])); + } + }; + + template struct calcDist_, IT> + { + static inline IT f(const Vec a, const Vec b) + { + return + std::abs((IT)(a[0]-b[0])) + + std::abs((IT)(a[1]-b[1])) + + std::abs((IT)(a[2]-b[2])); + } + }; + +public: + template static inline IT calcDist(const T a, const T b) + { + return calcDist_::f(a, b); + } + + template + static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2) + { + const T a = m.at(i1, j1); + const T b = m.at(i2, j2); + return calcDist(a,b); + } + + template + static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) + { + return calcDist(a_down, b_down) - calcDist(a_up, b_up); + }; + + template + static double calcWeight(double dist, double h) + { + return std::exp(-dist*dist / (h * h * pixelInfo::channels)); + } + + template + static double maxDist() + { + return (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; } }; -template struct calcDist_, IT> +class DistSquared { - static inline IT f(const Vec a, const Vec b) + template struct calcDist_ { - return std::abs((IT)(a[0]-b[0])) + std::abs((IT)(a[1]-b[1])); - } -}; + static inline IT f(const T a, const T b) + { + return (IT)(a-b) * (IT)(a-b); + } + }; -template struct calcDist_, IT> -{ - static inline IT f(const Vec a, const Vec b) + template struct calcDist_, IT> { - return std::abs((IT)(a[0]-b[0])) + std::abs((IT)(a[1]-b[1])) + std::abs((IT)(a[2]-b[2])); + static inline IT f(const Vec a, const Vec b) + { + return (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]); + } + }; + + template struct calcDist_, IT> + { + static inline IT f(const Vec a, const Vec b) + { + return + (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]) + + (IT)(a[2]-b[2])*(IT)(a[2]-b[2]); + } + }; + + template struct calcUpDownDist_ + { + static inline IT f(T a_up, T a_down, T b_up, T b_down) + { + IT A = a_down - b_down; + IT B = a_up - b_up; + return (A-B)*(A+B); + } + }; + + template struct calcUpDownDist_, IT> + { + private: + typedef Vec T; + public: + static inline IT f(T a_up, T a_down, T b_up, T b_down) + { + return calcDist(a_down, b_down) - calcDist(a_up, b_up); + } + }; + +public: + template static inline IT calcDist(const T a, const T b) + { + return calcDist_::f(a, b); } -}; -template static inline IT calcDist(const T a, const T b) -{ - return calcDist_::f(a, b); -} + template + static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2) + { + const T a = m.at(i1, j1); + const T b = m.at(i2, j2); + return calcDist(a,b); + } -template -static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2) -{ - const T a = m.at(i1, j1); - const T b = m.at(i2, j2); - return calcDist(a,b); -} + template + static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) + { + return calcUpDownDist_::f(a_up, a_down, b_up, b_down); + }; -template -static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) -{ - return calcDist(a_down, b_down) - calcDist(a_up, b_up); + template + static double calcWeight(double dist, double h) + { + return std::exp(-dist / (h * h * pixelInfo::channels)); + } + + template + static double maxDist() + { + return (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * + (IT)pixelInfo::channels; + } }; template struct incWithWeight_ diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 808b01f50..0a2bdd739 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -50,7 +50,7 @@ using namespace cv; -template +template struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody { @@ -94,8 +94,8 @@ private: Array4d& up_col_dist_sums) const; }; -template -FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( +template +FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, @@ -139,7 +139,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; - IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; + IT max_dist = D::template maxDist(); int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight.resize(almost_max_dist); @@ -147,7 +147,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); + IT weight = (IT)round(fixed_point_mult_ * D::template calcWeight(dist, h)); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -160,8 +160,8 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type()); } -template -void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const +template +void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { int row_from = range.start; int row_to = range.end - 1; @@ -234,7 +234,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang dist_sums_row[x] -= col_dist_sums_row[x]; col_dist_sums_row[x] = up_col_dist_sums_row[x] + - calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); + D::template calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); dist_sums_row[x] += col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x]; @@ -284,8 +284,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang } } -template -inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( +template +inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( int i, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { int j = 0; @@ -310,7 +310,7 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstEl { for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) { - IT dist = calcDist( + IT dist = D::template calcDist( main_extended_src_.at(border_size_ + i + ty, border_size_ + j + tx), cur_extended_src.at(border_size_ + start_y + ty, border_size_ + start_x + tx)); @@ -325,8 +325,8 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstEl } } -template -inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( +template +inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { @@ -353,7 +353,7 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElement IT* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x]; for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) { - *col_dist_sums_ptr += calcDist( + *col_dist_sums_ptr += D::template calcDist( main_extended_src_.at(ay + ty, ax), cur_extended_src.at(by + ty, bx)); } From 0fdb95e195c499a6aa9a02d5a49abed9709b4258 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Sun, 1 Mar 2015 20:59:34 +0100 Subject: [PATCH 11/40] Refactoring and addition of CV_8UC3 to ocl_fastNlMeansDenoising --- .../src/fast_nlmeans_denoising_opencl.hpp | 44 +++++++++--- modules/photo/src/opencl/nlmeans.cl | 72 ++++++++++++------- 2 files changed, 78 insertions(+), 38 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index 1cdd8fa49..cd7dde385 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -70,11 +70,11 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { - int type = _src.type(), cn = CV_MAT_CN(type); + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT; Size size = _src.size(); - if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC4 ) + if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC3 ) return false; int templateWindowHalfWize = templateWindowSize / 2; @@ -86,13 +86,15 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, char cvt[2][40]; String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d" - " -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" + " -D sample_t=%s -D pixel_t=%s -D int_t=%s" + " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" - " -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s", - templateWindowSize, searchWindowSize, ocl::typeToStr(type), - ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize, - templateWindowHalfWize, searchWindowHalfSize, - ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn, + " -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s", + templateWindowSize, searchWindowSize, + ocl::typeToStr(depth), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), + BLOCK_COLS, BLOCK_ROWS, + ctaSize, templateWindowHalfWize, searchWindowHalfSize, + ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), type == CV_8UC3 ? 4 : cn, ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1])); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); @@ -107,10 +109,22 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, UMat srcex; int borderSize = searchWindowHalfSize + templateWindowHalfWize; - copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); + if (type == CV_8UC3) { + Mat src_rgb = _src.getMat(), src_rgba(size, CV_8UC4); + int from_to[] = { 0,0, 1,1, 2,2 }; + mixChannels(&src_rgb, 1, &src_rgba, 1, from_to, 3); + copyMakeBorder(src_rgba, srcex, + borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); + } + else + copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); _dst.create(size, type); - UMat dst = _dst.getUMat(); + UMat dst; + if (type == CV_8UC3) + dst.create(size, CV_8UC4); + else + dst = _dst.getUMat(); int searchWindowSizeSq = searchWindowSize * searchWindowSize; Size upColSumSize(size.width, searchWindowSizeSq * nblocksy); @@ -123,7 +137,15 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift); size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 }; - return k.run(2, globalsize, localsize, false); + if (!k.run(2, globalsize, localsize, false)) return false; + + if (type == CV_8UC3) { + Mat dst_rgba = dst.getMat(ACCESS_READ), dst_rgb = _dst.getMat(); + int from_to[] = { 0,0, 1,1, 2,2 }; + mixChannels(&dst_rgba, 1, &dst_rgb, 1, from_to, 3); + } + + return true; } static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index af3fb1f9b..c48adda0b 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -29,8 +29,11 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost if (almostDist < almostMaxDist) { FT dist = almostDist * almostDist2ActualDistMultiplier; +#ifdef ABS + int weight = convert_int_sat_rte(fixedPointMult * exp(-dist*dist * den)); +#else int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den)); - +#endif if (weight < WEIGHT_THRESHOLD * fixedPointMult) weight = 0; @@ -44,21 +47,33 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost #define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE) -inline int calcDist(uchar_t a, uchar_t b) +inline int calcDist(pixel_t a, pixel_t b) { +#ifdef ABS + int_t retval = convert_int_t(abs_diff(a, b)); +#else int_t diff = convert_int_t(a) - convert_int_t(b); int_t retval = diff * diff; +#endif #if cn == 1 return retval; #elif cn == 2 return retval.x + retval.y; +#elif cn == 3 || cn == 4 /* A is ignored */ + return retval.x + retval.y + retval.z; #else -#error "cn should be either 1 or 2" +#error "cn should be either 1, 2, 3 or 4" #endif } -inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t) +#ifdef ABS +inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t) +{ + return calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t); +} +#else +inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t) { int_t A = convert_int_t(down_value) - convert_int_t(down_value_t); int_t B = convert_int_t(up_value) - convert_int_t(up_value_t); @@ -68,14 +83,17 @@ inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_v return retval; #elif cn == 2 return retval.x + retval.y; +#elif cn == 3 || cn == 4 /* A is ignored */ + return retval.x + retval.y + retval.z; #else -#error "cn should be either 1 or 2" +#error "cn should be either 1, 2, 3 or 4" #endif } +#endif #define COND if (x == 0 && y == 0) -inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset, +inline void calcFirstElementInRow(__global const sample_t * src, int src_step, int src_offset, __local int * dists, int y, int x, int id, __global int * col_dists, __global int * up_col_dists) { @@ -87,9 +105,9 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int { int dist = 0, value; - __global const uchar_t * src_template = (__global const uchar_t *)(src + + __global const pixel_t * src_template = (__global const pixel_t *)(src + mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset))); - __global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); + __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); __global int * col_dists_current = col_dists + i * TEMPLATE_SIZE; #pragma unroll @@ -107,8 +125,8 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int dist += value; } - src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step); - src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step); + src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step); + src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step); } #pragma unroll @@ -120,7 +138,7 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int } } -inline void calcElementInFirstRow(__global const uchar * src, int src_step, int src_offset, +inline void calcElementInFirstRow(__global const sample_t * src, int src_step, int src_offset, __local int * dists, int y, int x0, int x, int id, int first, __global int * col_dists, __global int * up_col_dists) { @@ -130,8 +148,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE) { - __global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); - __global const uchar_t * src_template = (__global const uchar_t *)(src + + __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); + __global const pixel_t * src_template = (__global const pixel_t *)(src + mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset))); __global int * col_dists_current = col_dists + TEMPLATE_SIZE * i; @@ -142,8 +160,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int { col_dist += calcDist(src_current[0], src_template[0]); - src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step); - src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step); + src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step); + src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step); } dists[i] += col_dist - col_dists_current[first]; @@ -152,7 +170,7 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int } } -inline void calcElement(__global const uchar * src, int src_step, int src_offset, +inline void calcElement(__global const sample_t * src, int src_step, int src_offset, __local int * dists, int y, int x0, int x, int id, int first, __global int * col_dists, __global int * up_col_dists) { @@ -160,8 +178,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset int sy_up = y - TEMPLATE_SIZE2 - 1; int sy_down = y + TEMPLATE_SIZE2; - uchar_t up_value = *(__global const uchar_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset))); - uchar_t down_value = *(__global const uchar_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset))); + pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset))); + pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset))); sx -= SEARCH_SIZE2; sy_up -= SEARCH_SIZE2; @@ -171,8 +189,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset { int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE; - uchar_t up_value_t = *(__global const uchar_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset))); - uchar_t down_value_t = *(__global const uchar_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset))); + pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset))); + pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset))); __global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first); __global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i); @@ -185,9 +203,9 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset } } -inline void convolveWindow(__global const uchar * src, int src_step, int src_offset, +inline void convolveWindow(__global const sample_t * src, int src_step, int src_offset, __local int * dists, __global const int * almostDist2Weight, - __global uchar * dst, int dst_step, int dst_offset, + __global sample_t * dst, int dst_step, int dst_offset, int y, int x, int id, __local int * weights_local, __local int_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) { @@ -197,7 +215,7 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE) { int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset)); - int_t src_value = convert_int_t(*(__global const uchar_t *)(src + src_index)); + int_t src_value = convert_int_t(*(__global const pixel_t *)(src + src_index)); int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift; int weight = almostDist2Weight[almostAvgDist]; @@ -228,13 +246,13 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off weighted_sum_local[2] + weighted_sum_local[3]; int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3]; - *(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / (int_t)(weights_local_0)); + *(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (int_t)(weights_local_0)); } } -__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset, - __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, - __global const int * almostDist2Weight, __global uchar * buffer, +__kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, int src_offset, + __global sample_t * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, + __global const int * almostDist2Weight, __global sample_t * buffer, int almostTemplateWindowSizeSqBinShift) { int block_x = get_group_id(0), nblocks_x = get_num_groups(0); From 9f7cac8c5933df74bf953227368b1bcd181d1b12 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Sun, 1 Mar 2015 21:01:57 +0100 Subject: [PATCH 12/40] Addtion of test cases for CV_8UC3 --- modules/photo/test/ocl/test_denoising.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index cb2d74f85..48efc8ab5 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -87,7 +87,7 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat) } } -OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2, 3), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool())); } } // namespace cvtest::ocl From a9ff335a8923c92e9dc86ddac3571aeaae6f0fbf Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Sun, 1 Mar 2015 22:21:36 +0100 Subject: [PATCH 13/40] Added OpenCL support for FastNlMeansDenoisingAbs --- modules/photo/src/denoising.cpp | 6 +++++- .../src/fast_nlmeans_denoising_opencl.hpp | 18 +++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 52065b5f6..3fe1f2b90 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -51,7 +51,7 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, Size src_size = _src.size(); CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize)) + ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize, false)) Mat src = _src.getMat(); _dst.create(src_size, src.type()); @@ -88,6 +88,10 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { Size src_size = _src.size(); + CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && + src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes + ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize, true)) + Mat src = _src.getMat(); _dst.create(src_size, src.type()); Mat dst = _dst.getMat(); diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index cd7dde385..5e96533fb 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -30,7 +30,7 @@ static int divUp(int a, int b) template static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn, - int & almostTemplateWindowSizeSqBinShift) + int & almostTemplateWindowSizeSqBinShift, bool abs) { const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255; int fixedPointMult = std::numeric_limits::max() / maxEstimateSumValue; @@ -48,15 +48,15 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq; const FT WEIGHT_THRESHOLD = 1e-3f; - int maxDist = 255 * 255 * cn; + int maxDist = abs ? 255 * cn : 255 * 255 * cn; int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1); FT den = 1.0f / (h * h * cn); almostDist2Weight.create(1, almostMaxDist, CV_32SC1); ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc, - format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth), - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + format("-D OP_CALC_WEIGHTS -D FT=%s%s%s", ocl::typeToStr(depth), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", abs ? " -D ABS" : "")); if (k.empty()) return false; @@ -68,7 +68,7 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow } static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, - int templateWindowSize, int searchWindowSize) + int templateWindowSize, int searchWindowSize, bool abs) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT; @@ -89,21 +89,21 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, " -D sample_t=%s -D pixel_t=%s -D int_t=%s" " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" - " -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s", + " -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s%s", templateWindowSize, searchWindowSize, ocl::typeToStr(depth), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize, templateWindowHalfWize, searchWindowHalfSize, ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), type == CV_8UC3 ? 4 : cn, - ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1])); + ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]), abs ? " -D ABS" : ""); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); if (k.empty()) return false; UMat almostDist2Weight; - if (!ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn, - almostTemplateWindowSizeSqBinShift)) + if (!ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, + h, cn, almostTemplateWindowSizeSqBinShift, abs)) return false; CV_Assert(almostTemplateWindowSizeSqBinShift >= 0); From 3bde9e93651a0d2f388ba3b5be7e3c5d9de9820c Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Sun, 1 Mar 2015 22:22:09 +0100 Subject: [PATCH 14/40] Added test cases --- modules/photo/test/ocl/test_denoising.cpp | 51 +++++++++++++++++------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index 48efc8ab5..30dc680c8 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -13,11 +13,11 @@ namespace cvtest { namespace ocl { -PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool) +PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool, bool) { int cn, templateWindowSize, searchWindowSize; float h; - bool use_roi; + bool use_roi, use_image; TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_OUTPUT_PARAMETER(dst); @@ -26,6 +26,7 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool) { cn = GET_PARAM(0); use_roi = GET_PARAM(1); + use_image = GET_PARAM(2); templateWindowSize = 7; searchWindowSize = 21; @@ -34,20 +35,27 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool) virtual void generateTestData() { + const int type = CV_8UC(cn); Mat image; - if (cn == 1) - { - image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE); + + if (use_image) { + image = readImage("denoising/lena_noised_gaussian_sigma=10.png", + cn == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR); ASSERT_FALSE(image.empty()); } - const int type = CV_8UC(cn); - - Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE); + Size roiSize = use_image ? image.size() : randomSize(1, MAX_VALUE); Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255); - if (cn == 1) - image.copyTo(src_roi); + if (use_image) { + ASSERT_TRUE(cn == 1 || cn == 2 || cn == 3); + if (cn == 2) { + int from_to[] = { 0,0, 1,1 }; + src_roi.create(roiSize, type); + mixChannels(&image, 1, &src_roi, 1, from_to, 2); + } + else image.copyTo(src_roi); + } Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255); @@ -72,6 +80,21 @@ OCL_TEST_P(FastNlMeansDenoising, Mat) } } +typedef FastNlMeansDenoisingTestBase FastNlMeansDenoisingAbs; + +OCL_TEST_P(FastNlMeansDenoisingAbs, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::fastNlMeansDenoisingAbs(src_roi, dst_roi, h, templateWindowSize, searchWindowSize)); + OCL_ON(cv::fastNlMeansDenoisingAbs(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize)); + + OCL_EXPECT_MATS_NEAR(dst, 1); + } +} + typedef FastNlMeansDenoisingTestBase FastNlMeansDenoisingColored; OCL_TEST_P(FastNlMeansDenoisingColored, Mat) @@ -87,8 +110,12 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat) } } -OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2, 3), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, + Combine(Values(1, 2, 3), Bool(), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingAbs, + Combine(Values(1, 2, 3), Bool(), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, + Combine(Values(3, 4), Bool(), Values(false))); } } // namespace cvtest::ocl From 73663dcdd1f0f06a0567f266c4f9ebeb9b74a2b2 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Mon, 2 Mar 2015 03:29:17 +0100 Subject: [PATCH 15/40] Added support for 16-bit input --- .../src/fast_nlmeans_denoising_opencl.hpp | 57 ++++++++++++------- modules/photo/src/opencl/nlmeans.cl | 31 ++++++---- 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index 5e96533fb..a88b5cfd7 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -28,12 +28,14 @@ static int divUp(int a, int b) return (a + b - 1) / b; } -template +template static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn, int & almostTemplateWindowSizeSqBinShift, bool abs) { - const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255; - int fixedPointMult = std::numeric_limits::max() / maxEstimateSumValue; + const WT maxEstimateSumValue = searchWindowSize * searchWindowSize * + std::numeric_limits::max(); + int fixedPointMult = (int)std::min(std::numeric_limits::max() / maxEstimateSumValue, + std::numeric_limits::max()); int depth = DataType::depth; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; @@ -48,7 +50,8 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq; const FT WEIGHT_THRESHOLD = 1e-3f; - int maxDist = abs ? 255 * cn : 255 * 255 * cn; + int maxDist = abs ? std::numeric_limits::max() * cn : + std::numeric_limits::max() * std::numeric_limits::max() * cn; int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1); FT den = 1.0f / (h * h * cn); @@ -74,7 +77,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT; Size size = _src.size(); - if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC3 ) + if (cn != 1 && cn != 2 && cn != 3 && depth != CV_8U && (!abs || depth != CV_16U)) return false; int templateWindowHalfWize = templateWindowSize / 2; @@ -84,45 +87,60 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS); int almostTemplateWindowSizeSqBinShift = -1; - char cvt[2][40]; + char buf[4][40]; String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d" " -D sample_t=%s -D pixel_t=%s -D int_t=%s" + " -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s" " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" " -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s%s", templateWindowSize, searchWindowSize, ocl::typeToStr(depth), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), + depth == CV_8U ? ocl::typeToStr(CV_32S) : "long", + depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) : + (sprintf(buf[0], "long%d", cn), buf[0]), + depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) : + (sprintf(buf[1], "convert_long%d", cn), buf[1]), BLOCK_COLS, BLOCK_ROWS, ctaSize, templateWindowHalfWize, searchWindowHalfSize, - ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), type == CV_8UC3 ? 4 : cn, - ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]), abs ? " -D ABS" : ""); + ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn == 3 ? 4 : cn, + ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : ""); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); if (k.empty()) return false; UMat almostDist2Weight; - if (!ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, - h, cn, almostTemplateWindowSizeSqBinShift, abs)) + if ((depth == CV_8U && + !ocl_calcAlmostDist2Weight(almostDist2Weight, + searchWindowSize, templateWindowSize, + h, cn, + almostTemplateWindowSizeSqBinShift, + abs)) || + (depth == CV_16U && + !ocl_calcAlmostDist2Weight(almostDist2Weight, + searchWindowSize, templateWindowSize, + h, cn, + almostTemplateWindowSizeSqBinShift, + abs))) return false; CV_Assert(almostTemplateWindowSizeSqBinShift >= 0); UMat srcex; int borderSize = searchWindowHalfSize + templateWindowHalfWize; - if (type == CV_8UC3) { - Mat src_rgb = _src.getMat(), src_rgba(size, CV_8UC4); + if (cn == 3) { + UMat tmp(size, CV_MAKE_TYPE(depth, 4)); int from_to[] = { 0,0, 1,1, 2,2 }; - mixChannels(&src_rgb, 1, &src_rgba, 1, from_to, 3); - copyMakeBorder(src_rgba, srcex, - borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); + mixChannels(std::vector(1, _src.getUMat()), std::vector(1, tmp), from_to, 3); + copyMakeBorder(tmp, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); } else copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); _dst.create(size, type); UMat dst; - if (type == CV_8UC3) - dst.create(size, CV_8UC4); + if (cn == 3) + dst.create(size, CV_MAKE_TYPE(depth, 4)); else dst = _dst.getUMat(); @@ -139,10 +157,9 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 }; if (!k.run(2, globalsize, localsize, false)) return false; - if (type == CV_8UC3) { - Mat dst_rgba = dst.getMat(ACCESS_READ), dst_rgb = _dst.getMat(); + if (cn == 3) { int from_to[] = { 0,0, 1,1, 2,2 }; - mixChannels(&dst_rgba, 1, &dst_rgb, 1, from_to, 3); + mixChannels(std::vector(1, dst), std::vector(1, _dst.getUMat()), from_to, 3); } return true; diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index c48adda0b..3a104c42a 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -206,22 +206,23 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off inline void convolveWindow(__global const sample_t * src, int src_step, int src_offset, __local int * dists, __global const int * almostDist2Weight, __global sample_t * dst, int dst_step, int dst_offset, - int y, int x, int id, __local int * weights_local, - __local int_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) + int y, int x, int id, __local weight_t * weights_local, + __local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) { - int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2, weights = 0; - int_t weighted_sum = (int_t)(0); + int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2; + weight_t weights = 0; + sum_t weighted_sum = (sum_t)(0); for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE) { int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset)); - int_t src_value = convert_int_t(*(__global const pixel_t *)(src + src_index)); + sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index)); int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift; int weight = almostDist2Weight[almostAvgDist]; - weights += weight; - weighted_sum += (int_t)(weight) * src_value; + weights += (weight_t)weight; + weighted_sum += (sum_t)(weight) * src_value; } weights_local[id] = weights; @@ -242,11 +243,11 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_ if (id == 0) { int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset)); - int_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] + + sum_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] + weighted_sum_local[2] + weighted_sum_local[3]; - int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3]; + weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3]; - *(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (int_t)(weights_local_0)); + *(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)(weights_local_0)); } } @@ -259,8 +260,9 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, int block_y = get_group_id(1); int id = get_local_id(0), first; - __local int dists[SEARCH_SIZE_SQ], weights[CTA_SIZE]; - __local int_t weighted_sum[CTA_SIZE]; + __local int dists[SEARCH_SIZE_SQ]; + __local weight_t weights[CTA_SIZE]; + __local sum_t weighted_sum[CTA_SIZE]; int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols); int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows); @@ -271,6 +273,11 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, __global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int)); __global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE; + src_step /= sizeof(sample_t); + src_offset /= sizeof(sample_t); + dst_step /= sizeof(sample_t); + dst_offset /= sizeof(sample_t); + for (int y = y0; y < y1; ++y) for (int x = x0; x < x1; ++x) { From 50bb14a0a8642ffdf71969c78226ddd236bf97b9 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Mon, 2 Mar 2015 15:48:00 +0100 Subject: [PATCH 16/40] Avoiding unnecessary copy by creating borders in place after RGB -> RGBA conversion --- modules/photo/src/fast_nlmeans_denoising_opencl.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index a88b5cfd7..9c0e40401 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -129,10 +129,12 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, UMat srcex; int borderSize = searchWindowHalfSize + templateWindowHalfWize; if (cn == 3) { - UMat tmp(size, CV_MAKE_TYPE(depth, 4)); + srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4)); + UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height)); int from_to[] = { 0,0, 1,1, 2,2 }; - mixChannels(std::vector(1, _src.getUMat()), std::vector(1, tmp), from_to, 3); - copyMakeBorder(tmp, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); + mixChannels(std::vector(1, _src.getUMat()), std::vector(1, src), from_to, 3); + copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize, + BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place } else copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); From 87760d13fbee8b005800c55246fa59a3e4cc8685 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Mon, 2 Mar 2015 22:33:14 +0100 Subject: [PATCH 17/40] Cleanup and addition of 4-component support for ocl_fastNlMeansDenoising --- .../src/fast_nlmeans_denoising_opencl.hpp | 6 ++-- modules/photo/src/opencl/nlmeans.cl | 28 +++++++++++-------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index 9c0e40401..41264045c 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -77,7 +77,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT; Size size = _src.size(); - if (cn != 1 && cn != 2 && cn != 3 && depth != CV_8U && (!abs || depth != CV_16U)) + if (cn != 1 && cn != 2 && cn != 3 && cn != 4 && depth != CV_8U && (!abs || depth != CV_16U)) return false; int templateWindowHalfWize = templateWindowSize / 2; @@ -93,7 +93,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, " -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s" " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" - " -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s%s", + " -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s", templateWindowSize, searchWindowSize, ocl::typeToStr(depth), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), depth == CV_8U ? ocl::typeToStr(CV_32S) : "long", @@ -103,7 +103,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, (sprintf(buf[1], "convert_long%d", cn), buf[1]), BLOCK_COLS, BLOCK_ROWS, ctaSize, templateWindowHalfWize, searchWindowHalfSize, - ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn == 3 ? 4 : cn, + ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, cn == 3 ? 4 : cn, ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : ""); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index 3a104c42a..91b012354 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -60,8 +60,10 @@ inline int calcDist(pixel_t a, pixel_t b) return retval; #elif cn == 2 return retval.x + retval.y; -#elif cn == 3 || cn == 4 /* A is ignored */ +#elif cn == 3 return retval.x + retval.y + retval.z; +#elif cn == 4 + return retval.x + retval.y + retval.z + retval.w; #else #error "cn should be either 1, 2, 3 or 4" #endif @@ -83,8 +85,10 @@ inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_v return retval; #elif cn == 2 return retval.x + retval.y; -#elif cn == 3 || cn == 4 /* A is ignored */ +#elif cn == 3 return retval.x + retval.y + retval.z; +#elif cn == 4 + return retval.x + retval.y + retval.z + retval.w; #else #error "cn should be either 1, 2, 3 or 4" #endif @@ -106,8 +110,8 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i int dist = 0, value; __global const pixel_t * src_template = (__global const pixel_t *)(src + - mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset))); - __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); + mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset))); + __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset))); __global int * col_dists_current = col_dists + i * TEMPLATE_SIZE; #pragma unroll @@ -148,9 +152,9 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE) { - __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset))); + __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset))); __global const pixel_t * src_template = (__global const pixel_t *)(src + - mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset))); + mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset))); __global int * col_dists_current = col_dists + TEMPLATE_SIZE * i; int col_dist = 0; @@ -178,8 +182,8 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off int sy_up = y - TEMPLATE_SIZE2 - 1; int sy_down = y + TEMPLATE_SIZE2; - pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset))); - pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset))); + pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(psz, sx, src_offset))); + pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(psz, sx, src_offset))); sx -= SEARCH_SIZE2; sy_up -= SEARCH_SIZE2; @@ -189,8 +193,8 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off { int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE; - pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset))); - pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset))); + pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(psz, sx + wx, src_offset))); + pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(psz, sx + wx, src_offset))); __global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first); __global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i); @@ -215,7 +219,7 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_ for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE) { - int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset)); + int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, psz, src_offset)); sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index)); int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift; @@ -242,7 +246,7 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_ if (id == 0) { - int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset)); + int dst_index = mad24(y, dst_step, mad24(psz, x, dst_offset)); sum_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] + weighted_sum_local[2] + weighted_sum_local[3]; weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3]; From ae08884854a7b46db96eef489b6a943d1bb04f56 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 3 Mar 2015 01:19:34 +0100 Subject: [PATCH 18/40] Added support for 4-component input for fastNlMeansDenoising[Multi][Abs] --- modules/photo/src/denoising.cpp | 41 ++++++++++++++-- .../src/fast_nlmeans_denoising_invoker.hpp | 2 +- ...fast_nlmeans_denoising_invoker_commons.hpp | 48 +++++++++++++++++++ .../fast_nlmeans_multi_denoising_invoker.hpp | 2 +- 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 3fe1f2b90..b41f83ec9 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -78,9 +78,14 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; + case CV_8UC4: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; default: CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, and CV_8UC3 are supported"); + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); } } @@ -112,6 +117,11 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; + case CV_8UC4: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; case CV_16U: parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( @@ -127,9 +137,14 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, FastNlMeansDenoisingInvoker, int64, uint64, DistAbs>( src, dst, templateWindowSize, searchWindowSize, h)); break; + case CV_16UC4: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs>( + src, dst, templateWindowSize, searchWindowSize, h)); + break; default: CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_16U, CV_16UC2, and CV_16UC3 are supported"); + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_8UC4, CV_16U, CV_16UC2, CV_16UC3 and CV_16UC4 are supported"); } } @@ -240,9 +255,15 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; + case CV_8UC4: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; default: CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, and CV_8UC3 are supported"); + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); } } @@ -280,6 +301,12 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; + case CV_8UC4: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; case CV_16U: parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( @@ -298,9 +325,15 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; + case CV_16UC4: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; default: CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_16U, CV_16UC2, and CV_16UC3 are supported"); + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_8UC4, CV_16U, CV_16UC2, CV_16UC3 and CV_16UC4 are supported"); } } diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 468fa82f7..01588b03d 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -227,7 +227,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) } // calc weights - IT estimation[3], weights_sum = 0; + IT estimation[pixelInfo::channels], weights_sum = 0; for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = 0; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index d55d93ce7..d77ca3e1f 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -110,6 +110,18 @@ class DistAbs } }; + template struct calcDist_, IT> + { + static inline IT f(const Vec a, const Vec b) + { + return + std::abs((IT)(a[0]-b[0])) + + std::abs((IT)(a[1]-b[1])) + + std::abs((IT)(a[2]-b[2])) + + std::abs((IT)(a[3]-b[3])); + } + }; + public: template static inline IT calcDist(const T a, const T b) { @@ -172,6 +184,18 @@ class DistSquared } }; + template struct calcDist_, IT> + { + static inline IT f(const Vec a, const Vec b) + { + return + (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]) + + (IT)(a[2]-b[2])*(IT)(a[2]-b[2]) + + (IT)(a[3]-b[3])*(IT)(a[3]-b[3]); + } + }; + template struct calcUpDownDist_ { static inline IT f(T a_up, T a_down, T b_up, T b_down) @@ -254,6 +278,17 @@ template struct incWithWeight_, IT> } }; +template struct incWithWeight_, IT> +{ + static inline void f(IT* estimation, IT weight, Vec p) + { + estimation[0] += weight * p[0]; + estimation[1] += weight * p[1]; + estimation[2] += weight * p[2]; + estimation[3] += weight * p[3]; + } +}; + template static inline void incWithWeight(IT* estimation, IT weight, T p) { @@ -291,6 +326,19 @@ template struct saturateCastFromArray_, IT } }; +template struct saturateCastFromArray_, IT> +{ + static inline Vec f(IT* estimation) + { + Vec res; + res[0] = saturate_cast(estimation[0]); + res[1] = saturate_cast(estimation[1]); + res[2] = saturate_cast(estimation[2]); + res[3] = saturate_cast(estimation[3]); + return res; + } +}; + template static inline T saturateCastFromArray(IT* estimation) { return saturateCastFromArray_::f(estimation); diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 0a2bdd739..eb2078643 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -249,7 +249,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r // calc weights IT weights_sum = 0; - IT estimation[3]; + IT estimation[pixelInfo::channels]; for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = 0; From ac6771f975144c00c153431687dce6ecc45303cf Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 3 Mar 2015 01:20:33 +0100 Subject: [PATCH 19/40] Added test cases --- modules/photo/test/ocl/test_denoising.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index 30dc680c8..4aba4b51e 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -48,12 +48,17 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool, bool) Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255); if (use_image) { - ASSERT_TRUE(cn == 1 || cn == 2 || cn == 3); + ASSERT_TRUE(cn == 1 || cn == 2 || cn == 3 || cn == 4); if (cn == 2) { int from_to[] = { 0,0, 1,1 }; src_roi.create(roiSize, type); mixChannels(&image, 1, &src_roi, 1, from_to, 2); } + else if (cn == 4) { + int from_to[] = { 0,0, 1,1, 2,2, 1,3}; + src_roi.create(roiSize, type); + mixChannels(&image, 1, &src_roi, 1, from_to, 4); + } else image.copyTo(src_roi); } @@ -111,9 +116,9 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat) } OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, - Combine(Values(1, 2, 3), Bool(), Bool())); + Combine(Values(1, 2, 3, 4), Bool(), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingAbs, - Combine(Values(1, 2, 3), Bool(), Bool())); + Combine(Values(1, 2, 3, 4), Bool(), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool(), Values(false))); From d56d04e41bfc88dd26aa4b9799e0f6922266183a Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 3 Mar 2015 01:34:29 +0100 Subject: [PATCH 20/40] Updated documentation --- modules/photo/include/opencv2/photo.hpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index c25a35e6d..446e81750 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask, with several computational optimizations. Noise expected to be a gaussian white noise -@param src Input 8-bit 1-channel, 2-channel or 3-channel image. +@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image. @param dst Output image with the same size and type as src . @param templateWindowSize Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels @@ -144,7 +144,7 @@ with several computational optimizations. Noise expected to be a gaussian white noise. Uses squared sum of absolute value distances instead of sum of squared distances for weight calculation -@param src Input 8-bit or 16-bit 1-channel, 2-channel or 3-channel image. +@param src Input 8-bit or 16-bit 1-channel, 2-channel, 3-channel or 4-channel image. @param dst Output image with the same size and type as src . @param templateWindowSize Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels @@ -190,8 +190,9 @@ captured in small period of time. For example video. This version of the functio images or for manual manipulation with colorspaces. For more details see -@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should -have the same type and size. +@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or +4-channel images sequence. All images should have the same type and +size. @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence @param temporalWindowSize Number of surrounding images to use for target image denoising. Should be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to @@ -220,9 +221,9 @@ details see squared sum of absolute value distances instead of sum of squared distances for weight calculation -@param srcImgs Input 8-bit or 16-bit 1-channel, 2-channel or 3-channel -images sequence. All images should -have the same type and size. +@param srcImgs Input 8-bit or 16-bit 1-channel, 2-channel, 3-channel +or 4-channel images sequence. All images should have the same type and +size. @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence @param temporalWindowSize Number of surrounding images to use for target image denoising. Should be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to From 69eae13ff3f6115b6716c2dc1927b679580f9ced Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 3 Mar 2015 03:02:44 +0100 Subject: [PATCH 21/40] Changed pointers from sample_t * to uchar *. Rescaling psz accordingly. --- .../src/fast_nlmeans_denoising_opencl.hpp | 7 +++-- modules/photo/src/opencl/nlmeans.cl | 29 ++++++++----------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index 41264045c..2fa11a351 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -89,13 +89,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, char buf[4][40]; String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d" - " -D sample_t=%s -D pixel_t=%s -D int_t=%s" + " -D pixel_t=%s -D int_t=%s" " -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s" " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" " -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s", templateWindowSize, searchWindowSize, - ocl::typeToStr(depth), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), + ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), depth == CV_8U ? ocl::typeToStr(CV_32S) : "long", depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) : (sprintf(buf[0], "long%d", cn), buf[0]), @@ -103,7 +103,8 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, (sprintf(buf[1], "convert_long%d", cn), buf[1]), BLOCK_COLS, BLOCK_ROWS, ctaSize, templateWindowHalfWize, searchWindowHalfSize, - ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, cn == 3 ? 4 : cn, + ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, + (depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn), ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : ""); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index 91b012354..11837a5fc 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -97,7 +97,7 @@ inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_v #define COND if (x == 0 && y == 0) -inline void calcFirstElementInRow(__global const sample_t * src, int src_step, int src_offset, +inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset, __local int * dists, int y, int x, int id, __global int * col_dists, __global int * up_col_dists) { @@ -129,8 +129,8 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i dist += value; } - src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step); - src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step); + src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step); + src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step); } #pragma unroll @@ -142,7 +142,7 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i } } -inline void calcElementInFirstRow(__global const sample_t * src, int src_step, int src_offset, +inline void calcElementInFirstRow(__global const uchar * src, int src_step, int src_offset, __local int * dists, int y, int x0, int x, int id, int first, __global int * col_dists, __global int * up_col_dists) { @@ -164,8 +164,8 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i { col_dist += calcDist(src_current[0], src_template[0]); - src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step); - src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step); + src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step); + src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step); } dists[i] += col_dist - col_dists_current[first]; @@ -174,7 +174,7 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i } } -inline void calcElement(__global const sample_t * src, int src_step, int src_offset, +inline void calcElement(__global const uchar * src, int src_step, int src_offset, __local int * dists, int y, int x0, int x, int id, int first, __global int * col_dists, __global int * up_col_dists) { @@ -207,9 +207,9 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off } } -inline void convolveWindow(__global const sample_t * src, int src_step, int src_offset, +inline void convolveWindow(__global const uchar * src, int src_step, int src_offset, __local int * dists, __global const int * almostDist2Weight, - __global sample_t * dst, int dst_step, int dst_offset, + __global uchar * dst, int dst_step, int dst_offset, int y, int x, int id, __local weight_t * weights_local, __local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) { @@ -255,9 +255,9 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_ } } -__kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, int src_offset, - __global sample_t * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, - __global const int * almostDist2Weight, __global sample_t * buffer, +__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset, + __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, + __global const int * almostDist2Weight, __global uchar * buffer, int almostTemplateWindowSizeSqBinShift) { int block_x = get_group_id(0), nblocks_x = get_num_groups(0); @@ -277,11 +277,6 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, __global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int)); __global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE; - src_step /= sizeof(sample_t); - src_offset /= sizeof(sample_t); - dst_step /= sizeof(sample_t); - dst_offset /= sizeof(sample_t); - for (int y = y0; y < y1; ++y) for (int x = x0; x < x1; ++x) { From 305cff36e2c5a334821bd6e40eddd16ba304e6fe Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Thu, 5 Mar 2015 13:36:42 +0100 Subject: [PATCH 22/40] Changed from IT to int for distance calculation --- .../src/fast_nlmeans_denoising_invoker.hpp | 50 +++---- ...fast_nlmeans_denoising_invoker_commons.hpp | 124 +++++++++--------- .../fast_nlmeans_multi_denoising_invoker.hpp | 54 ++++---- 3 files changed, 114 insertions(+), 114 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 01588b03d..2ebf76af4 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -80,15 +80,15 @@ private: std::vector almost_dist2weight_; void calcDistSumsForFirstElementInRow( - int i, Array2d& dist_sums, - Array3d& col_dist_sums, - Array3d& up_col_dist_sums) const; + int i, Array2d& dist_sums, + Array3d& col_dist_sums, + Array3d& up_col_dist_sums) const; void calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, - Array2d& dist_sums, - Array3d& col_dist_sums, - Array3d& up_col_dist_sums) const; + Array2d& dist_sums, + Array3d& col_dist_sums, + Array3d& up_col_dist_sums) const; }; inline int getNearestPowerOf2(int value) @@ -128,8 +128,8 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; - IT max_dist = D::template maxDist(); - size_t almost_max_dist = (size_t)(max_dist / almost_dist2actual_dist_multiplier + 1); + int max_dist = D::template maxDist(); + int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight_.resize(almost_max_dist); const double WEIGHT_THRESHOLD = 0.001; @@ -156,14 +156,14 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) int row_to = range.end - 1; // sums of cols anf rows for current pixel p - Array2d dist_sums(search_window_size_, search_window_size_); + Array2d dist_sums(search_window_size_, search_window_size_); // for lazy calc optimization (sum of cols for current pixel) - Array3d col_dist_sums(template_window_size_, search_window_size_, search_window_size_); + Array3d col_dist_sums(template_window_size_, search_window_size_, search_window_size_); int first_col_num = -1; // last elements of column sum (for each element in row) - Array3d up_col_dist_sums(src_.cols, search_window_size_, search_window_size_); + Array3d up_col_dist_sums(src_.cols, search_window_size_, search_window_size_); for (int i = row_from; i <= row_to; i++) { @@ -202,9 +202,9 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) for (int y = 0; y < search_window_size; y++) { - IT * dist_sums_row = dist_sums.row_ptr(y); - IT * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y); - IT * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y); + int * dist_sums_row = dist_sums.row_ptr(y); + int * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y); + int * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y); const T * b_up_ptr = extended_src_.ptr(start_by - template_window_half_size_ - 1 + y); const T * b_down_ptr = extended_src_.ptr(start_by + template_window_half_size_ + y); @@ -215,7 +215,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) dist_sums_row[x] -= col_dist_sums_row[x]; int bx = start_bx + x; - col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); + col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); dist_sums_row[x] += col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x]; @@ -234,10 +234,10 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) for (int y = 0; y < search_window_size_; y++) { const T* cur_row_ptr = extended_src_.ptr(border_size_ + search_window_y + y); - IT* dist_sums_row = dist_sums.row_ptr(y); + int* dist_sums_row = dist_sums.row_ptr(y); for (int x = 0; x < search_window_size_; x++) { - size_t almostAvgDist = (size_t)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_); + int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_; IT weight = almost_dist2weight_[almostAvgDist]; weights_sum += weight; @@ -257,9 +257,9 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) template inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( int i, - Array2d& dist_sums, - Array3d& col_dist_sums, - Array3d& up_col_dist_sums) const + Array2d& dist_sums, + Array3d& col_dist_sums, + Array3d& up_col_dist_sums) const { int j = 0; @@ -276,7 +276,7 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElem for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) { - int dist = D::template calcDist(extended_src_, + int dist = D::template calcDist(extended_src_, border_size_ + i + ty, border_size_ + j + tx, border_size_ + start_y + ty, border_size_ + start_x + tx); @@ -291,9 +291,9 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElem template inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, - Array2d& dist_sums, - Array3d& col_dist_sums, - Array3d& up_col_dist_sums) const + Array2d& dist_sums, + Array3d& col_dist_sums, + Array3d& up_col_dist_sums) const { int ay = border_size_ + i; int ax = border_size_ + j + template_window_half_size_; @@ -312,7 +312,7 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementIn int by = start_by + y; int bx = start_bx + x; for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) - col_dist_sums[new_last_col_num][y][x] += D::template calcDist(extended_src_, ay + ty, ax, by + ty, bx); + col_dist_sums[new_last_col_num][y][x] += D::template calcDist(extended_src_, ay + ty, ax, by + ty, bx); dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x]; up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x]; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index d77ca3e1f..dbb4c5eb3 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -83,63 +83,63 @@ template struct pixelInfo: public pixelInfo_ class DistAbs { - template struct calcDist_ + template struct calcDist_ { - static inline IT f(const T a, const T b) + static inline int f(const T a, const T b) { - return std::abs((IT)(a-b)); + return std::abs((int)(a-b)); } }; - template struct calcDist_, IT> + template struct calcDist_ > { - static inline IT f(const Vec a, const Vec b) + static inline int f(const Vec a, const Vec b) { - return std::abs((IT)(a[0]-b[0])) + std::abs((IT)(a[1]-b[1])); + return std::abs((int)(a[0]-b[0])) + std::abs((int)(a[1]-b[1])); } }; - template struct calcDist_, IT> + template struct calcDist_ > { - static inline IT f(const Vec a, const Vec b) + static inline int f(const Vec a, const Vec b) { return - std::abs((IT)(a[0]-b[0])) + - std::abs((IT)(a[1]-b[1])) + - std::abs((IT)(a[2]-b[2])); + std::abs((int)(a[0]-b[0])) + + std::abs((int)(a[1]-b[1])) + + std::abs((int)(a[2]-b[2])); } }; - template struct calcDist_, IT> + template struct calcDist_ > { - static inline IT f(const Vec a, const Vec b) + static inline int f(const Vec a, const Vec b) { return - std::abs((IT)(a[0]-b[0])) + - std::abs((IT)(a[1]-b[1])) + - std::abs((IT)(a[2]-b[2])) + - std::abs((IT)(a[3]-b[3])); + std::abs((int)(a[0]-b[0])) + + std::abs((int)(a[1]-b[1])) + + std::abs((int)(a[2]-b[2])) + + std::abs((int)(a[3]-b[3])); } }; public: - template static inline IT calcDist(const T a, const T b) + template static inline int calcDist(const T a, const T b) { - return calcDist_::f(a, b); + return calcDist_::f(a, b); } - template - static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2) + template + static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) { const T a = m.at(i1, j1); const T b = m.at(i2, j2); - return calcDist(a,b); + return calcDist(a,b); } - template - static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) + template + static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) { - return calcDist(a_down, b_down) - calcDist(a_up, b_up); + return calcDist(a_down, b_down) - calcDist(a_up, b_up); }; template @@ -148,93 +148,93 @@ public: return std::exp(-dist*dist / (h * h * pixelInfo::channels)); } - template + template static double maxDist() { - return (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; + return (int)pixelInfo::sampleMax() * pixelInfo::channels; } }; class DistSquared { - template struct calcDist_ + template struct calcDist_ { - static inline IT f(const T a, const T b) + static inline int f(const T a, const T b) { - return (IT)(a-b) * (IT)(a-b); + return (int)(a-b) * (int)(a-b); } }; - template struct calcDist_, IT> + template struct calcDist_ > { - static inline IT f(const Vec a, const Vec b) + static inline int f(const Vec a, const Vec b) { - return (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]); + return (int)(a[0]-b[0])*(int)(a[0]-b[0]) + (int)(a[1]-b[1])*(int)(a[1]-b[1]); } }; - template struct calcDist_, IT> + template struct calcDist_ > { - static inline IT f(const Vec a, const Vec b) + static inline int f(const Vec a, const Vec b) { return - (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + - (IT)(a[1]-b[1])*(IT)(a[1]-b[1]) + - (IT)(a[2]-b[2])*(IT)(a[2]-b[2]); + (int)(a[0]-b[0])*(int)(a[0]-b[0]) + + (int)(a[1]-b[1])*(int)(a[1]-b[1]) + + (int)(a[2]-b[2])*(int)(a[2]-b[2]); } }; - template struct calcDist_, IT> + template struct calcDist_ > { - static inline IT f(const Vec a, const Vec b) + static inline int f(const Vec a, const Vec b) { return - (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + - (IT)(a[1]-b[1])*(IT)(a[1]-b[1]) + - (IT)(a[2]-b[2])*(IT)(a[2]-b[2]) + - (IT)(a[3]-b[3])*(IT)(a[3]-b[3]); + (int)(a[0]-b[0])*(int)(a[0]-b[0]) + + (int)(a[1]-b[1])*(int)(a[1]-b[1]) + + (int)(a[2]-b[2])*(int)(a[2]-b[2]) + + (int)(a[3]-b[3])*(int)(a[3]-b[3]); } }; - template struct calcUpDownDist_ + template struct calcUpDownDist_ { - static inline IT f(T a_up, T a_down, T b_up, T b_down) + static inline int f(T a_up, T a_down, T b_up, T b_down) { - IT A = a_down - b_down; - IT B = a_up - b_up; + int A = a_down - b_down; + int B = a_up - b_up; return (A-B)*(A+B); } }; - template struct calcUpDownDist_, IT> + template struct calcUpDownDist_ > { private: typedef Vec T; public: - static inline IT f(T a_up, T a_down, T b_up, T b_down) + static inline int f(T a_up, T a_down, T b_up, T b_down) { - return calcDist(a_down, b_down) - calcDist(a_up, b_up); + return calcDist(a_down, b_down) - calcDist(a_up, b_up); } }; public: - template static inline IT calcDist(const T a, const T b) + template static inline int calcDist(const T a, const T b) { - return calcDist_::f(a, b); + return calcDist_::f(a, b); } - template - static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2) + template + static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) { const T a = m.at(i1, j1); const T b = m.at(i2, j2); - return calcDist(a,b); + return calcDist(a,b); } - template - static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down) + template + static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) { - return calcUpDownDist_::f(a_up, a_down, b_up, b_down); + return calcUpDownDist_::f(a_up, a_down, b_up, b_down); }; template @@ -243,11 +243,11 @@ public: return std::exp(-dist / (h * h * pixelInfo::channels)); } - template + template static double maxDist() { - return (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * - (IT)pixelInfo::channels; + return (int)pixelInfo::sampleMax() * (int)pixelInfo::sampleMax() * + pixelInfo::channels; } }; diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index eb2078643..f1a334040 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -85,13 +85,13 @@ private: int almost_template_window_size_sq_bin_shift; std::vector almost_dist2weight; - void calcDistSumsForFirstElementInRow(int i, Array3d& dist_sums, - Array4d& col_dist_sums, - Array4d& up_col_dist_sums) const; + void calcDistSumsForFirstElementInRow(int i, Array3d& dist_sums, + Array4d& col_dist_sums, + Array4d& up_col_dist_sums) const; void calcDistSumsForElementInFirstRow(int i, int j, int first_col_num, - Array3d& dist_sums, Array4d& col_dist_sums, - Array4d& up_col_dist_sums) const; + Array3d& dist_sums, Array4d& col_dist_sums, + Array4d& up_col_dist_sums) const; }; template @@ -139,8 +139,8 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoke int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; - IT max_dist = D::template maxDist(); - int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); + int max_dist = D::template maxDist(); + int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight.resize(almost_max_dist); const double WEIGHT_THRESHOLD = 0.001; @@ -166,13 +166,13 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r int row_from = range.start; int row_to = range.end - 1; - Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); + Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); // for lazy calc optimization - Array4d col_dist_sums(template_window_size_, temporal_window_size_, search_window_size_, search_window_size_); + Array4d col_dist_sums(template_window_size_, temporal_window_size_, search_window_size_, search_window_size_); int first_col_num = -1; - Array4d up_col_dist_sums(cols_, temporal_window_size_, search_window_size_, search_window_size_); + Array4d up_col_dist_sums(cols_, temporal_window_size_, search_window_size_, search_window_size_); for (int i = row_from; i <= row_to; i++) { @@ -216,15 +216,15 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r for (int d = 0; d < temporal_window_size_; d++) { Mat cur_extended_src = extended_srcs_[d]; - Array2d cur_dist_sums = dist_sums[d]; - Array2d cur_col_dist_sums = col_dist_sums[first_col_num][d]; - Array2d cur_up_col_dist_sums = up_col_dist_sums[j][d]; + Array2d cur_dist_sums = dist_sums[d]; + Array2d cur_col_dist_sums = col_dist_sums[first_col_num][d]; + Array2d cur_up_col_dist_sums = up_col_dist_sums[j][d]; for (int y = 0; y < search_window_size; y++) { - IT* dist_sums_row = cur_dist_sums.row_ptr(y); + int* dist_sums_row = cur_dist_sums.row_ptr(y); - IT* col_dist_sums_row = cur_col_dist_sums.row_ptr(y); - IT* up_col_dist_sums_row = cur_up_col_dist_sums.row_ptr(y); + int* col_dist_sums_row = cur_col_dist_sums.row_ptr(y); + int* up_col_dist_sums_row = cur_up_col_dist_sums.row_ptr(y); const T* b_up_ptr = cur_extended_src.ptr(start_by - template_window_half_size_ - 1 + y); const T* b_down_ptr = cur_extended_src.ptr(start_by + template_window_half_size_ + y); @@ -234,7 +234,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r dist_sums_row[x] -= col_dist_sums_row[x]; col_dist_sums_row[x] = up_col_dist_sums_row[x] + - D::template calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); + D::template calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); dist_sums_row[x] += col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x]; @@ -260,11 +260,11 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r { const T* cur_row_ptr = esrc_d.ptr(border_size_ + search_window_y + y); - IT* dist_sums_row = dist_sums.row_ptr(d, y); + int* dist_sums_row = dist_sums.row_ptr(d, y); for (int x = 0; x < search_window_size_; x++) { - size_t almostAvgDist = (size_t)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift); + int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift; IT weight = almost_dist2weight[almostAvgDist]; weights_sum += weight; @@ -286,7 +286,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r template inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( - int i, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const + int i, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { int j = 0; @@ -303,14 +303,14 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirs int start_y = i + y - search_window_half_size_; int start_x = j + x - search_window_half_size_; - IT* dist_sums_ptr = &dist_sums[d][y][x]; - IT* col_dist_sums_ptr = &col_dist_sums[0][d][y][x]; + int* dist_sums_ptr = &dist_sums[d][y][x]; + int* col_dist_sums_ptr = &col_dist_sums[0][d][y][x]; int col_dist_sums_step = col_dist_sums.step_size(0); for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) { for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) { - IT dist = D::template calcDist( + int dist = D::template calcDist( main_extended_src_.at(border_size_ + i + ty, border_size_ + j + tx), cur_extended_src.at(border_size_ + start_y + ty, border_size_ + start_x + tx)); @@ -327,8 +327,8 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirs template inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( - int i, int j, int first_col_num, Array3d& dist_sums, - Array4d& col_dist_sums, Array4d& up_col_dist_sums) const + int i, int j, int first_col_num, Array3d& dist_sums, + Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { int ay = border_size_ + i; int ax = border_size_ + j + template_window_half_size_; @@ -350,10 +350,10 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElem int by = start_by + y; int bx = start_bx + x; - IT* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x]; + int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x]; for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) { - *col_dist_sums_ptr += D::template calcDist( + *col_dist_sums_ptr += D::template calcDist( main_extended_src_.at(ay + ty, ax), cur_extended_src.at(by + ty, bx)); } From 18be52c05b5d3167c937976c146a392675c828fc Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Thu, 5 Mar 2015 13:55:06 +0100 Subject: [PATCH 23/40] Changed LUTs from IT to int --- .../src/fast_nlmeans_denoising_invoker.hpp | 13 +++++---- ...fast_nlmeans_denoising_invoker_commons.hpp | 28 +++++++++---------- .../fast_nlmeans_multi_denoising_invoker.hpp | 13 +++++---- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 2ebf76af4..ec154fbe6 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -75,9 +75,9 @@ private: int template_window_half_size_; int search_window_half_size_; - IT fixed_point_mult_; + int fixed_point_mult_; int almost_template_window_size_sq_bin_shift_; - std::vector almost_dist2weight_; + std::vector almost_dist2weight_; void calcDistSumsForFirstElementInRow( int i, Array2d& dist_sums, @@ -119,7 +119,8 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const IT max_estimate_sum_value = (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo::sampleMax(); - fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; + fixed_point_mult_ = (int)std::min(std::numeric_limits::max() / max_estimate_sum_value, + std::numeric_limits::max()); // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift @@ -136,7 +137,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * D::template calcWeight(dist, h)); + int weight = (int)round(fixed_point_mult_ * D::template calcWeight(dist, h)); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -238,8 +239,8 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) for (int x = 0; x < search_window_size_; x++) { int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_; - IT weight = almost_dist2weight_[almostAvgDist]; - weights_sum += weight; + int weight = almost_dist2weight_[almostAvgDist]; + weights_sum += (IT)weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; incWithWeight(estimation, weight, p); diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index dbb4c5eb3..4d66efe46 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -253,39 +253,39 @@ public: template struct incWithWeight_ { - static inline void f(IT* estimation, IT weight, T p) + static inline void f(IT* estimation, int weight, T p) { - estimation[0] += weight * p; + estimation[0] += (IT)weight * p; } }; template struct incWithWeight_, IT> { - static inline void f(IT* estimation, IT weight, Vec p) + static inline void f(IT* estimation, int weight, Vec p) { - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; + estimation[0] += (IT)weight * p[0]; + estimation[1] += (IT)weight * p[1]; } }; template struct incWithWeight_, IT> { - static inline void f(IT* estimation, IT weight, Vec p) + static inline void f(IT* estimation, int weight, Vec p) { - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; - estimation[2] += weight * p[2]; + estimation[0] += (IT)weight * p[0]; + estimation[1] += (IT)weight * p[1]; + estimation[2] += (IT)weight * p[2]; } }; template struct incWithWeight_, IT> { - static inline void f(IT* estimation, IT weight, Vec p) + static inline void f(IT* estimation, int weight, Vec p) { - estimation[0] += weight * p[0]; - estimation[1] += weight * p[1]; - estimation[2] += weight * p[2]; - estimation[3] += weight * p[3]; + estimation[0] += (IT)weight * p[0]; + estimation[1] += (IT)weight * p[1]; + estimation[2] += (IT)weight * p[2]; + estimation[3] += (IT)weight * p[3]; } }; diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index f1a334040..f9c1264b2 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -81,9 +81,9 @@ private: int search_window_half_size_; int temporal_window_half_size_; - IT fixed_point_mult_; + int fixed_point_mult_; int almost_template_window_size_sq_bin_shift; - std::vector almost_dist2weight; + std::vector almost_dist2weight; void calcDistSumsForFirstElementInRow(int i, Array3d& dist_sums, Array4d& col_dist_sums, @@ -127,7 +127,8 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoke main_extended_src_ = extended_srcs_[temporal_window_half_size_]; const IT max_estimate_sum_value = (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo::sampleMax(); - fixed_point_mult_ = std::numeric_limits::max() / max_estimate_sum_value; + fixed_point_mult_ = (int)std::min(std::numeric_limits::max() / max_estimate_sum_value, + std::numeric_limits::max()); // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift @@ -147,7 +148,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoke for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * D::template calcWeight(dist, h)); + int weight = (int)round(fixed_point_mult_ * D::template calcWeight(dist, h)); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -266,8 +267,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r { int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift; - IT weight = almost_dist2weight[almostAvgDist]; - weights_sum += weight; + int weight = almost_dist2weight[almostAvgDist]; + weights_sum += (IT)weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; incWithWeight(estimation, weight, p); From c41efe4e303d51bf207bb54f60d2f4508acfe53d Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Thu, 5 Mar 2015 17:50:52 +0100 Subject: [PATCH 24/40] Refactoring in preparation for per-channel h-values --- modules/photo/src/denoising.cpp | 96 ++++++------ .../src/fast_nlmeans_denoising_invoker.hpp | 45 +++--- ...fast_nlmeans_denoising_invoker_commons.hpp | 147 +++++++++++++++--- .../fast_nlmeans_multi_denoising_invoker.hpp | 48 +++--- 4 files changed, 216 insertions(+), 120 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index b41f83ec9..29899f791 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -65,23 +65,23 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC2: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC3: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC4: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; default: CV_Error(Error::StsBadArg, @@ -104,43 +104,43 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC2: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC3: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC4: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_16U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_16UC2: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs>( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_16UC3: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs>( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + src, dst, templateWindowSize, searchWindowSize, &h)); break; case CV_16UC4: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs>( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + src, dst, templateWindowSize, searchWindowSize, &h)); break; default: CV_Error(Error::StsBadArg, @@ -239,27 +239,27 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds { case CV_8U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC2: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC3: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC4: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; default: CV_Error(Error::StsBadArg, @@ -285,51 +285,51 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray { case CV_8U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC2: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC3: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_8UC4: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_16U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_16UC2: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_16UC3: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; case CV_16UC4: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h)); break; default: CV_Error(Error::StsBadArg, diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index ec154fbe6..9dea2a02f 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -50,13 +50,13 @@ using namespace cv; -template +template struct FastNlMeansDenoisingInvoker : public ParallelLoopBody { public: FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst, - int template_window_size, int search_window_size, const float h); + int template_window_size, int search_window_size, const float *h); void operator() (const Range& range) const; @@ -77,7 +77,7 @@ private: int fixed_point_mult_; int almost_template_window_size_sq_bin_shift_; - std::vector almost_dist2weight_; + std::vector almost_dist2weight_; void calcDistSumsForFirstElementInRow( int i, Array2d& dist_sums, @@ -99,12 +99,12 @@ inline int getNearestPowerOf2(int value) return p; } -template -FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( +template +FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const Mat& src, Mat& dst, int template_window_size, int search_window_size, - const float h) : + const float *h) : src_(src), dst_(dst) { CV_Assert(src.channels() == pixelInfo::channels); @@ -133,25 +133,20 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight_.resize(almost_max_dist); - const double WEIGHT_THRESHOLD = 0.001; for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - int weight = (int)round(fixed_point_mult_ * D::template calcWeight(dist, h)); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) - weight = 0; - - almost_dist2weight_[almost_dist] = weight; + almost_dist2weight_[almost_dist] = + D::template calcWeight(dist, h, fixed_point_mult_); } - CV_Assert(almost_dist2weight_[0] == fixed_point_mult_); // additional optimization init end if (dst_.empty()) dst_ = Mat::zeros(src_.size(), src_.type()); } -template -void FastNlMeansDenoisingInvoker::operator() (const Range& range) const +template +void FastNlMeansDenoisingInvoker::operator() (const Range& range) const { int row_from = range.start; int row_to = range.end - 1; @@ -228,9 +223,9 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) } // calc weights - IT estimation[pixelInfo::channels], weights_sum = 0; + IT estimation[pixelInfo::channels], weights_sum[pixelInfo::channels]; for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) - estimation[channel_num] = 0; + estimation[channel_num] = weights_sum[channel_num] = 0; for (int y = 0; y < search_window_size_; y++) { @@ -240,23 +235,23 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) { int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_; int weight = almost_dist2weight_[almostAvgDist]; - weights_sum += (IT)weight; - T p = cur_row_ptr[border_size_ + search_window_x + x]; - incWithWeight(estimation, weight, p); + incWithWeight(estimation, weights_sum, weight, p); } } for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) - estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum/2) / weights_sum; + estimation[channel_num] = + (static_cast(estimation[channel_num]) + weights_sum[channel_num]/2) / + weights_sum[channel_num]; dst_.at(i,j) = saturateCastFromArray(estimation); } } } -template -inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( +template +inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElementInRow( int i, Array2d& dist_sums, Array3d& col_dist_sums, @@ -289,8 +284,8 @@ inline void FastNlMeansDenoisingInvoker::calcDistSumsForFirstElem } } -template -inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( +template +inline void FastNlMeansDenoisingInvoker::calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, Array2d& dist_sums, Array3d& col_dist_sums, diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index 4d66efe46..53a6f5ed6 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -122,6 +122,36 @@ class DistAbs } }; + static const double WEIGHT_THRESHOLD = 0.001; + template struct calcWeight_ + { + static inline WT f(double dist, const float *h, int fixed_point_mult) + { + WT weight = (WT)round(fixed_point_mult * + std::exp(-dist*dist / (h[0]*h[0] * pixelInfo::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult) + weight = 0; + return weight; + } + }; + + template struct calcWeight_ > + { + static inline Vec f(double dist, const float *h, int fixed_point_mult) + { + Vec res; + for (int i=0; i::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult) + weight = 0; + res[i] = weight; + } + return res; + } + }; + public: template static inline int calcDist(const T a, const T b) { @@ -142,14 +172,14 @@ public: return calcDist(a_down, b_down) - calcDist(a_up, b_up); }; - template - static double calcWeight(double dist, double h) + template + static inline WT calcWeight(double dist, const float *h, int fixed_point_mult) { - return std::exp(-dist*dist / (h * h * pixelInfo::channels)); + return calcWeight_::f(dist, h, fixed_point_mult); } template - static double maxDist() + static inline double maxDist() { return (int)pixelInfo::sampleMax() * pixelInfo::channels; } @@ -217,6 +247,36 @@ class DistSquared } }; + static const double WEIGHT_THRESHOLD = 0.001; + template struct calcWeight_ + { + static inline WT f(double dist, const float *h, int fixed_point_mult) + { + WT weight = (WT)round(fixed_point_mult * + std::exp(-dist / (h[0]*h[0] * pixelInfo::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult) + weight = 0; + return weight; + } + }; + + template struct calcWeight_ > + { + static inline Vec f(double dist, const float *h, int fixed_point_mult) + { + Vec res; + for (int i=0; i::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult) + weight = 0; + res[i] = weight; + } + return res; + } + }; + public: template static inline int calcDist(const T a, const T b) { @@ -237,62 +297,111 @@ public: return calcUpDownDist_::f(a_up, a_down, b_up, b_down); }; - template - static double calcWeight(double dist, double h) + template + static inline WT calcWeight(double dist, const float *h, int fixed_point_mult) { - return std::exp(-dist / (h * h * pixelInfo::channels)); + return calcWeight_::f(dist, h, fixed_point_mult); } template - static double maxDist() + static inline double maxDist() { return (int)pixelInfo::sampleMax() * (int)pixelInfo::sampleMax() * pixelInfo::channels; } }; -template struct incWithWeight_ +template struct incWithWeight_ { - static inline void f(IT* estimation, int weight, T p) + static inline void f(IT* estimation, IT* weights_sum, WT weight, T p) { estimation[0] += (IT)weight * p; + weights_sum[0] += (IT)weight; } }; -template struct incWithWeight_, IT> +template struct incWithWeight_, IT, int> { - static inline void f(IT* estimation, int weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, int weight, Vec p) { estimation[0] += (IT)weight * p[0]; estimation[1] += (IT)weight * p[1]; + weights_sum[0] += (IT)weight; + weights_sum[1] += (IT)weight; } }; -template struct incWithWeight_, IT> +template struct incWithWeight_, IT, int> { - static inline void f(IT* estimation, int weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, int weight, Vec p) { estimation[0] += (IT)weight * p[0]; estimation[1] += (IT)weight * p[1]; estimation[2] += (IT)weight * p[2]; + weights_sum[0] += (IT)weight; + weights_sum[1] += (IT)weight; + weights_sum[2] += (IT)weight; } }; -template struct incWithWeight_, IT> +template struct incWithWeight_, IT, int> { - static inline void f(IT* estimation, int weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, int weight, Vec p) { estimation[0] += (IT)weight * p[0]; estimation[1] += (IT)weight * p[1]; estimation[2] += (IT)weight * p[2]; estimation[3] += (IT)weight * p[3]; + weights_sum[0] += (IT)weight; + weights_sum[1] += (IT)weight; + weights_sum[2] += (IT)weight; + weights_sum[3] += (IT)weight; } }; -template -static inline void incWithWeight(IT* estimation, IT weight, T p) +template struct incWithWeight_, IT, Vec > { - return incWithWeight_::f(estimation, weight, p); + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + { + estimation[0] += (IT)weight[0] * p[0]; + estimation[1] += (IT)weight[1] * p[1]; + weights_sum[0] += (IT)weight[0]; + weights_sum[1] += (IT)weight[1]; + } +}; + +template struct incWithWeight_, IT, Vec > +{ + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + { + estimation[0] += (IT)weight[0] * p[0]; + estimation[1] += (IT)weight[1] * p[1]; + estimation[2] += (IT)weight[2] * p[2]; + weights_sum[0] += (IT)weight[0]; + weights_sum[1] += (IT)weight[1]; + weights_sum[2] += (IT)weight[2]; + } +}; + +template struct incWithWeight_, IT, Vec > +{ + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + { + estimation[0] += (IT)weight[0] * p[0]; + estimation[1] += (IT)weight[1] * p[1]; + estimation[2] += (IT)weight[2] * p[2]; + estimation[3] += (IT)weight[3] * p[3]; + weights_sum[0] += (IT)weight[0]; + weights_sum[1] += (IT)weight[1]; + weights_sum[2] += (IT)weight[2]; + weights_sum[3] += (IT)weight[3]; + } +}; + +template +static inline void incWithWeight(IT* estimation, IT* weights_sum, IT weight, T p) +{ + return incWithWeight_::f(estimation, weights_sum, weight, p); } template struct saturateCastFromArray_ diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index f9c1264b2..489ee673f 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -50,14 +50,14 @@ using namespace cv; -template +template struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody { public: FastNlMeansMultiDenoisingInvoker(const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, Mat& dst, int template_window_size, - int search_window_size, const float h); + int search_window_size, const float *h); void operator() (const Range& range) const; @@ -83,7 +83,7 @@ private: int fixed_point_mult_; int almost_template_window_size_sq_bin_shift; - std::vector almost_dist2weight; + std::vector almost_dist2weight; void calcDistSumsForFirstElementInRow(int i, Array3d& dist_sums, Array4d& col_dist_sums, @@ -94,15 +94,15 @@ private: Array4d& up_col_dist_sums) const; }; -template -FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( +template +FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, cv::Mat& dst, int template_window_size, int search_window_size, - const float h) : + const float *h) : dst_(dst), extended_srcs_(srcImgs.size()) { CV_Assert(srcImgs.size() > 0); @@ -144,25 +144,20 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoke int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); almost_dist2weight.resize(almost_max_dist); - const double WEIGHT_THRESHOLD = 0.001; for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - int weight = (int)round(fixed_point_mult_ * D::template calcWeight(dist, h)); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) - weight = 0; - - almost_dist2weight[almost_dist] = weight; + almost_dist2weight[almost_dist] = + D::template calcWeight(dist, h, fixed_point_mult_); } - CV_Assert(almost_dist2weight[0] == fixed_point_mult_); // additional optimization init end if (dst_.empty()) dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type()); } -template -void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const +template +void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { int row_from = range.start; int row_to = range.end - 1; @@ -248,11 +243,9 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r } // calc weights - IT weights_sum = 0; - - IT estimation[pixelInfo::channels]; + IT estimation[pixelInfo::channels], weights_sum[pixelInfo::channels]; for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) - estimation[channel_num] = 0; + estimation[channel_num] = weights_sum[channel_num] = 0; for (int d = 0; d < temporal_window_size_; d++) { @@ -268,25 +261,24 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& r int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift; int weight = almost_dist2weight[almostAvgDist]; - weights_sum += (IT)weight; - T p = cur_row_ptr[border_size_ + search_window_x + x]; - incWithWeight(estimation, weight, p); + incWithWeight(estimation, weights_sum, weight, p); } } } for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) - estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum / 2) / weights_sum; + estimation[channel_num] = + (static_cast(estimation[channel_num]) + weights_sum[channel_num] / 2) / + weights_sum[channel_num]; dst_.at(i,j) = saturateCastFromArray(estimation); - } } } -template -inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( +template +inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirstElementInRow( int i, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { int j = 0; @@ -326,8 +318,8 @@ inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForFirs } } -template -inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( +template +inline void FastNlMeansMultiDenoisingInvoker::calcDistSumsForElementInFirstRow( int i, int j, int first_col_num, Array3d& dist_sums, Array4d& col_dist_sums, Array4d& up_col_dist_sums) const { From 1e82a67cc4d082abe9437dd163314a543bd90232 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 6 Mar 2015 14:28:43 +0100 Subject: [PATCH 25/40] Additional refactoring --- .../src/fast_nlmeans_denoising_invoker.hpp | 17 ++-- ...fast_nlmeans_denoising_invoker_commons.hpp | 91 +++++++++++-------- .../fast_nlmeans_multi_denoising_invoker.hpp | 17 ++-- 3 files changed, 71 insertions(+), 54 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 9dea2a02f..ff35550df 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -75,7 +75,7 @@ private: int template_window_half_size_; int search_window_half_size_; - int fixed_point_mult_; + typename pixelInfo::sampleType fixed_point_mult_; int almost_template_window_size_sq_bin_shift_; std::vector almost_dist2weight_; @@ -120,7 +120,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( const IT max_estimate_sum_value = (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo::sampleMax(); fixed_point_mult_ = (int)std::min(std::numeric_limits::max() / max_estimate_sum_value, - std::numeric_limits::max()); + pixelInfo::sampleMax()); // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift @@ -223,9 +223,11 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& ra } // calc weights - IT estimation[pixelInfo::channels], weights_sum[pixelInfo::channels]; + IT estimation[pixelInfo::channels], weights_sum[pixelInfo::channels]; for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) - estimation[channel_num] = weights_sum[channel_num] = 0; + estimation[channel_num] = 0; + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) + weights_sum[channel_num] = 0; for (int y = 0; y < search_window_size_; y++) { @@ -240,11 +242,8 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& ra } } - for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) - estimation[channel_num] = - (static_cast(estimation[channel_num]) + weights_sum[channel_num]/2) / - weights_sum[channel_num]; - + divByWeightsSum::channels, pixelInfo::channels>(estimation, + weights_sum); dst_.at(i,j) = saturateCastFromArray(estimation); } } diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index 53a6f5ed6..df8e4703e 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -122,11 +122,11 @@ class DistAbs } }; - static const double WEIGHT_THRESHOLD = 0.001; template struct calcWeight_ { - static inline WT f(double dist, const float *h, int fixed_point_mult) + static inline WT f(double dist, const float *h, WT fixed_point_mult) { + static const double WEIGHT_THRESHOLD = 0.001; WT weight = (WT)round(fixed_point_mult * std::exp(-dist*dist / (h[0]*h[0] * pixelInfo::channels))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult) @@ -137,17 +137,11 @@ class DistAbs template struct calcWeight_ > { - static inline Vec f(double dist, const float *h, int fixed_point_mult) + static inline Vec f(double dist, const float *h, ET fixed_point_mult) { Vec res; for (int i=0; i::channels))); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult) - weight = 0; - res[i] = weight; - } + res[i] = calcWeight(dist, &h[i], fixed_point_mult); return res; } }; @@ -247,11 +241,11 @@ class DistSquared } }; - static const double WEIGHT_THRESHOLD = 0.001; template struct calcWeight_ { static inline WT f(double dist, const float *h, int fixed_point_mult) { + static const double WEIGHT_THRESHOLD = 0.001; WT weight = (WT)round(fixed_point_mult * std::exp(-dist / (h[0]*h[0] * pixelInfo::channels))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult) @@ -266,13 +260,7 @@ class DistSquared { Vec res; for (int i=0; i::channels))); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult) - weight = 0; - res[i] = weight; - } + res[i] = calcWeight(dist, &h[i], fixed_point_mult); return res; } }; @@ -320,48 +308,42 @@ template struct incWithWeight_ } }; -template struct incWithWeight_, IT, int> +template struct incWithWeight_, IT, WT> { - static inline void f(IT* estimation, IT* weights_sum, int weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec p) { estimation[0] += (IT)weight * p[0]; estimation[1] += (IT)weight * p[1]; weights_sum[0] += (IT)weight; - weights_sum[1] += (IT)weight; } }; -template struct incWithWeight_, IT, int> +template struct incWithWeight_, IT, WT> { - static inline void f(IT* estimation, IT* weights_sum, int weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec p) { estimation[0] += (IT)weight * p[0]; estimation[1] += (IT)weight * p[1]; estimation[2] += (IT)weight * p[2]; weights_sum[0] += (IT)weight; - weights_sum[1] += (IT)weight; - weights_sum[2] += (IT)weight; } }; -template struct incWithWeight_, IT, int> +template struct incWithWeight_, IT, WT> { - static inline void f(IT* estimation, IT* weights_sum, int weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec p) { estimation[0] += (IT)weight * p[0]; estimation[1] += (IT)weight * p[1]; estimation[2] += (IT)weight * p[2]; estimation[3] += (IT)weight * p[3]; weights_sum[0] += (IT)weight; - weights_sum[1] += (IT)weight; - weights_sum[2] += (IT)weight; - weights_sum[3] += (IT)weight; } }; -template struct incWithWeight_, IT, Vec > +template struct incWithWeight_, IT, Vec > { - static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) { estimation[0] += (IT)weight[0] * p[0]; estimation[1] += (IT)weight[1] * p[1]; @@ -370,9 +352,9 @@ template struct incWithWeight_, IT, Vec struct incWithWeight_, IT, Vec > +template struct incWithWeight_, IT, Vec > { - static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) { estimation[0] += (IT)weight[0] * p[0]; estimation[1] += (IT)weight[1] * p[1]; @@ -383,9 +365,9 @@ template struct incWithWeight_, IT, Vec struct incWithWeight_, IT, Vec > +template struct incWithWeight_, IT, Vec > { - static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) + static inline void f(IT* estimation, IT* weights_sum, Vec weight, Vec p) { estimation[0] += (IT)weight[0] * p[0]; estimation[1] += (IT)weight[1] * p[1]; @@ -404,6 +386,43 @@ static inline void incWithWeight(IT* estimation, IT* weights_sum, IT weight, T p return incWithWeight_::f(estimation, weights_sum, weight, p); } +template struct divByWeightsSum_ +{ + static inline void f(IT* estimation, IT* weights_sum); +}; + +template struct divByWeightsSum_ +{ + static inline void f(IT* estimation, IT* weights_sum) + { + estimation[0] = (static_cast(estimation[0]) + weights_sum[0]/2) / weights_sum[0]; + } +}; + +template struct divByWeightsSum_ +{ + static inline void f(IT* estimation, IT* weights_sum) + { + for (size_t i = 0; i < n; i++) + estimation[i] = (static_cast(estimation[i]) + weights_sum[0]/2) / weights_sum[0]; + } +}; + +template struct divByWeightsSum_ +{ + static inline void f(IT* estimation, IT* weights_sum) + { + for (size_t i = 0; i < n; i++) + estimation[i] = (static_cast(estimation[i]) + weights_sum[i]/2) / weights_sum[i]; + } +}; + +template +static inline void divByWeightsSum(IT* estimation, IT* weights_sum) +{ + return divByWeightsSum_::f(estimation, weights_sum); +} + template struct saturateCastFromArray_ { static inline T f(IT* estimation) diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 489ee673f..cd3833a56 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -81,7 +81,7 @@ private: int search_window_half_size_; int temporal_window_half_size_; - int fixed_point_mult_; + typename pixelInfo::sampleType fixed_point_mult_; int almost_template_window_size_sq_bin_shift; std::vector almost_dist2weight; @@ -128,7 +128,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingIn const IT max_estimate_sum_value = (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo::sampleMax(); fixed_point_mult_ = (int)std::min(std::numeric_limits::max() / max_estimate_sum_value, - std::numeric_limits::max()); + pixelInfo::sampleMax()); // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift @@ -243,9 +243,11 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Rang } // calc weights - IT estimation[pixelInfo::channels], weights_sum[pixelInfo::channels]; + IT estimation[pixelInfo::channels], weights_sum[pixelInfo::channels]; for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) - estimation[channel_num] = weights_sum[channel_num] = 0; + estimation[channel_num] = 0; + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) + weights_sum[channel_num] = 0; for (int d = 0; d < temporal_window_size_; d++) { @@ -267,11 +269,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Rang } } - for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) - estimation[channel_num] = - (static_cast(estimation[channel_num]) + weights_sum[channel_num] / 2) / - weights_sum[channel_num]; - + divByWeightsSum::channels, pixelInfo::channels>(estimation, + weights_sum); dst_.at(i,j) = saturateCastFromArray(estimation); } } From 41ffcc27dd0887ee5942a9d48761f6958df0f318 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 6 Mar 2015 15:06:11 +0100 Subject: [PATCH 26/40] Added support for h = 0.0 --- ...fast_nlmeans_denoising_invoker_commons.hpp | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index df8e4703e..efd482f6b 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -126,11 +126,13 @@ class DistAbs { static inline WT f(double dist, const float *h, WT fixed_point_mult) { + double w = std::exp(-dist*dist / (h[0]*h[0] * pixelInfo::channels)); + if (std::isnan(w)) w = 1.0; // Handle h = 0.0 + static const double WEIGHT_THRESHOLD = 0.001; - WT weight = (WT)round(fixed_point_mult * - std::exp(-dist*dist / (h[0]*h[0] * pixelInfo::channels))); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult) - weight = 0; + WT weight = (WT)round(fixed_point_mult * w); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0; + return weight; } }; @@ -167,7 +169,8 @@ public: }; template - static inline WT calcWeight(double dist, const float *h, int fixed_point_mult) + static inline WT calcWeight(double dist, const float *h, + typename pixelInfo::sampleType fixed_point_mult) { return calcWeight_::f(dist, h, fixed_point_mult); } @@ -243,20 +246,22 @@ class DistSquared template struct calcWeight_ { - static inline WT f(double dist, const float *h, int fixed_point_mult) + static inline WT f(double dist, const float *h, WT fixed_point_mult) { + double w = std::exp(-dist / (h[0]*h[0] * pixelInfo::channels)); + if (std::isnan(w)) w = 1.0; // Handle h = 0.0 + static const double WEIGHT_THRESHOLD = 0.001; - WT weight = (WT)round(fixed_point_mult * - std::exp(-dist / (h[0]*h[0] * pixelInfo::channels))); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult) - weight = 0; + WT weight = (WT)round(fixed_point_mult * w); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0; + return weight; } }; template struct calcWeight_ > { - static inline Vec f(double dist, const float *h, int fixed_point_mult) + static inline Vec f(double dist, const float *h, ET fixed_point_mult) { Vec res; for (int i=0; i - static inline WT calcWeight(double dist, const float *h, int fixed_point_mult) + static inline WT calcWeight(double dist, const float *h, + typename pixelInfo::sampleType fixed_point_mult) { return calcWeight_::f(dist, h, fixed_point_mult); } From 324fa26848f1020d125bd45e1fa5459c07fb092a Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 6 Mar 2015 19:07:13 +0100 Subject: [PATCH 27/40] Refactoring of OpenCL implementation --- modules/photo/src/denoising.cpp | 6 ++- .../src/fast_nlmeans_denoising_opencl.hpp | 38 ++++++++++++------- modules/photo/src/opencl/nlmeans.cl | 31 ++++++++------- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 29899f791..30f638d4c 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -51,7 +51,8 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, Size src_size = _src.size(); CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize, false)) + ocl_fastNlMeansDenoising(_src, _dst, &h, 1, + templateWindowSize, searchWindowSize, false)) Mat src = _src.getMat(); _dst.create(src_size, src.type()); @@ -95,7 +96,8 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, Size src_size = _src.size(); CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize, true)) + ocl_fastNlMeansDenoising(_src, _dst, &h, 1, + templateWindowSize, searchWindowSize, true)) Mat src = _src.getMat(); _dst.create(src_size, src.type()); diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index 2fa11a351..a06dc6192 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -29,7 +29,7 @@ static int divUp(int a, int b) } template -static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn, +static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT *h, int hn, int cn, int & almostTemplateWindowSizeSqBinShift, bool abs) { const WT maxEstimateSumValue = searchWindowSize * searchWindowSize * @@ -53,24 +53,32 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow int maxDist = abs ? std::numeric_limits::max() * cn : std::numeric_limits::max() * std::numeric_limits::max() * cn; int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1); - FT den = 1.0f / (h * h * cn); + FT den[4]; + CV_Assert(hn > 0 && hn <= 4); + for (int i=0; i 1 ? format("%d", hn).c_str() : "").c_str(), + depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) : + format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(), depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) : - (sprintf(buf[0], "long%d", cn), buf[0]), + format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(), depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) : - (sprintf(buf[1], "convert_long%d", cn), buf[1]), + format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(), BLOCK_COLS, BLOCK_ROWS, ctaSize, templateWindowHalfWize, searchWindowHalfSize, ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, @@ -115,13 +127,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, if ((depth == CV_8U && !ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, - h, cn, + h, hn, cn, almostTemplateWindowSizeSqBinShift, abs)) || (depth == CV_16U && !ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, - h, cn, + h, hn, cn, almostTemplateWindowSizeSqBinShift, abs))) return false; diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index 11837a5fc..936aed6fa 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -20,9 +20,9 @@ #ifdef OP_CALC_WEIGHTS -__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist, +__kernel void calcAlmostDist2Weight(__global wlut_t * almostDist2Weight, int almostMaxDist, FT almostDist2ActualDistMultiplier, int fixedPointMult, - FT den, FT WEIGHT_THRESHOLD) + w_t den, FT WEIGHT_THRESHOLD) { int almostDist = get_global_id(0); @@ -30,14 +30,13 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost { FT dist = almostDist * almostDist2ActualDistMultiplier; #ifdef ABS - int weight = convert_int_sat_rte(fixedPointMult * exp(-dist*dist * den)); + w_t w = exp((w_t)(-dist*dist) * den); #else - int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den)); + w_t w = exp((w_t)(-dist) * den); #endif - if (weight < WEIGHT_THRESHOLD * fixedPointMult) - weight = 0; - - almostDist2Weight[almostDist] = weight; + wlut_t weight = convert_wlut_t(fixedPointMult * (isnan(w) ? (w_t)1.0 : w)); + almostDist2Weight[almostDist] = + weight < WEIGHT_THRESHOLD * fixedPointMult ? (wlut_t)0 : weight; } } @@ -208,14 +207,14 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset } inline void convolveWindow(__global const uchar * src, int src_step, int src_offset, - __local int * dists, __global const int * almostDist2Weight, + __local int * dists, __global const wlut_t * almostDist2Weight, __global uchar * dst, int dst_step, int dst_offset, int y, int x, int id, __local weight_t * weights_local, __local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) { int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2; - weight_t weights = 0; - sum_t weighted_sum = (sum_t)(0); + weight_t weights = (weight_t)0; + sum_t weighted_sum = (sum_t)0; for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE) { @@ -223,10 +222,10 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index)); int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift; - int weight = almostDist2Weight[almostAvgDist]; + weight_t weight = convert_weight_t(almostDist2Weight[almostAvgDist]); - weights += (weight_t)weight; - weighted_sum += (sum_t)(weight) * src_value; + weights += weight; + weighted_sum += (sum_t)weight * src_value; } weights_local[id] = weights; @@ -251,13 +250,13 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off weighted_sum_local[2] + weighted_sum_local[3]; weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3]; - *(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)(weights_local_0)); + *(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)weights_local_0); } } __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, - __global const int * almostDist2Weight, __global uchar * buffer, + __global const wlut_t * almostDist2Weight, __global uchar * buffer, int almostTemplateWindowSizeSqBinShift) { int block_x = get_group_id(0), nblocks_x = get_num_groups(0); From cc8d94c6fc977d116beb81c6a50f123790d01bef Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 6 Mar 2015 20:43:55 +0100 Subject: [PATCH 28/40] Addition of per-channel h-values for fastNlMeansDenoising[Multi][Abs] --- modules/photo/include/opencv2/photo.hpp | 126 +++++++++- modules/photo/src/denoising.cpp | 221 ++++++++++++++++++ .../src/fast_nlmeans_denoising_invoker.hpp | 2 +- ...fast_nlmeans_denoising_invoker_commons.hpp | 2 +- .../fast_nlmeans_multi_denoising_invoker.hpp | 2 +- modules/photo/src/opencl/nlmeans.cl | 2 +- 6 files changed, 344 insertions(+), 11 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index 446e81750..5e11333ee 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -138,6 +138,31 @@ parameter. CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Array of parameters regulating filter strength, one per +channel. Big h value perfectly removes noise but also removes image +details, smaller h value preserves details but also preserves some +noise + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ +CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float *h, + int templateWindowSize = 7, int searchWindowSize = 21); + /** @brief Perform image denoising using Non-local Means Denoising algorithm with several computational optimizations. Noise expected to be a @@ -163,6 +188,33 @@ parameter. CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Perform image denoising using Non-local Means Denoising +algorithm +with several computational optimizations. Noise expected to be a +gaussian white noise. Uses squared sum of absolute value distances +instead of sum of squared distances for weight calculation + +@param src Input 8-bit or 16-bit 1-channel, 2-channel, 3-channel or 4-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Array of parameters regulating filter strength, one per +channel. Big h value perfectly removes noise but also removes image +details, smaller h value preserves details but also preserves some +noise + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ +CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, float *h, + int templateWindowSize = 7, int searchWindowSize = 21); + /** @brief Modification of fastNlMeansDenoising function for colored images @param src Input 8-bit 3-channel image. @@ -204,14 +256,73 @@ Should be odd. Recommended value 7 pixels @param searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels -@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly -removes noise but also removes image details, smaller h value preserves details but also preserves -some noise +@param h Parameter regulating filter strength. Bigger h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise */ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been +captured in small period of time. For example video. This version of the function is for grayscale +images or for manual manipulation with colorspaces. For more details see + + +@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or +4-channel images sequence. All images should have the same type and +size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Array of parameters regulating filter strength, one for each +channel. Bigger h value perfectly removes noise but also removes image +details, smaller h value preserves details but also preserves some +noise + */ +CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + float *h , int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Modification of fastNlMeansDenoising function for images +sequence where consequtive images have been captured in small period +of time. For example video. This version of the function is for +grayscale images or for manual manipulation with colorspaces. For more +details see +. Uses +squared sum of absolute value distances instead of sum of squared +distances for weight calculation + +@param srcImgs Input 8-bit or 16-bit 1-channel, 2-channel, 3-channel +or 4-channel images sequence. All images should have the same type and +size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength. Bigger h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise + */ +CV_EXPORTS_W void fastNlMeansDenoisingMultiAbs( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); + /** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been captured in small period of time. For example video. This version of the function is for @@ -235,13 +346,14 @@ Should be odd. Recommended value 7 pixels @param searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels -@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly -removes noise but also removes image details, smaller h value preserves details but also preserves -some noise +@param h Array of parameters regulating filter strength, one for each +channel. Bigger h value perfectly removes noise but also removes image +details, smaller h value preserves details but also preserves some +noise */ CV_EXPORTS_W void fastNlMeansDenoisingMultiAbs( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, - float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); + float *h, int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 30f638d4c..9f63254b0 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -90,6 +90,51 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, } } +void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float *h, + int templateWindowSize, int searchWindowSize) +{ + Size src_size = _src.size(); + CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && + src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes + ocl_fastNlMeansDenoising(_src, _dst, h, CV_MAT_CN(_src.type()), + templateWindowSize, searchWindowSize, false)) + + Mat src = _src.getMat(); + _dst.create(src_size, src.type()); + Mat dst = _dst.getMat(); + +#ifdef HAVE_TEGRA_OPTIMIZATION + if(tegra::fastNlMeansDenoising(src, dst, h, templateWindowSize, searchWindowSize)) + return; +#endif + + switch (src.type()) { + case CV_8U: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC2: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC3: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC4: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); + } +} + void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { @@ -150,6 +195,66 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, } } +void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float *h, + int templateWindowSize, int searchWindowSize) +{ + Size src_size = _src.size(); + CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && + src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes + ocl_fastNlMeansDenoising(_src, _dst, h, CV_MAT_CN(_src.type()), + templateWindowSize, searchWindowSize, true)) + + Mat src = _src.getMat(); + _dst.create(src_size, src.type()); + Mat dst = _dst.getMat(); + + switch (src.type()) { + case CV_8U: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC2: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC3: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC4: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16U: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC2: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC3: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC4: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_8UC4, CV_16U, CV_16UC2, CV_16UC3 and CV_16UC4 are supported"); + } +} + void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, float h, float hForColorComponents, int templateWindowSize, int searchWindowSize) @@ -269,6 +374,52 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds } } +void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, + int imgToDenoiseIndex, int temporalWindowSize, + float *h, int templateWindowSize, int searchWindowSize) +{ + std::vector srcImgs; + _srcImgs.getMatVector(srcImgs); + + fastNlMeansDenoisingMultiCheckPreconditions( + srcImgs, imgToDenoiseIndex, + temporalWindowSize, templateWindowSize, searchWindowSize); + + _dst.create(srcImgs[0].size(), srcImgs[0].type()); + Mat dst = _dst.getMat(); + + switch (srcImgs[0].type()) + { + case CV_8U: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC2: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC3: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC4: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); + } +} + void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, float h, int templateWindowSize, int searchWindowSize) @@ -339,6 +490,76 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray } } +void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray _dst, + int imgToDenoiseIndex, int temporalWindowSize, + float *h, int templateWindowSize, int searchWindowSize) +{ + std::vector srcImgs; + _srcImgs.getMatVector(srcImgs); + + fastNlMeansDenoisingMultiCheckPreconditions( + srcImgs, imgToDenoiseIndex, + temporalWindowSize, templateWindowSize, searchWindowSize); + + _dst.create(srcImgs[0].size(), srcImgs[0].type()); + Mat dst = _dst.getMat(); + + switch (srcImgs[0].type()) + { + case CV_8U: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC2: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC3: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_8UC4: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16U: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC2: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC3: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC4: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_8UC4, CV_16U, CV_16UC2, CV_16UC3 and CV_16UC4 are supported"); + } +} + void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, float h, float hForColorComponents, diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index ff35550df..6e74acf03 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -236,7 +236,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& ra for (int x = 0; x < search_window_size_; x++) { int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_; - int weight = almost_dist2weight_[almostAvgDist]; + WT weight = almost_dist2weight_[almostAvgDist]; T p = cur_row_ptr[border_size_ + search_window_x + x]; incWithWeight(estimation, weights_sum, weight, p); } diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index efd482f6b..9833ea7d3 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -387,7 +387,7 @@ template struct incWithWeight_ -static inline void incWithWeight(IT* estimation, IT* weights_sum, IT weight, T p) +static inline void incWithWeight(IT* estimation, IT* weights_sum, WT weight, T p) { return incWithWeight_::f(estimation, weights_sum, weight, p); } diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index cd3833a56..3f13f400d 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -262,7 +262,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Rang { int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift; - int weight = almost_dist2weight[almostAvgDist]; + WT weight = almost_dist2weight[almostAvgDist]; T p = cur_row_ptr[border_size_ + search_window_x + x]; incWithWeight(estimation, weights_sum, weight, p); } diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index 936aed6fa..879665f48 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -36,7 +36,7 @@ __kernel void calcAlmostDist2Weight(__global wlut_t * almostDist2Weight, int alm #endif wlut_t weight = convert_wlut_t(fixedPointMult * (isnan(w) ? (w_t)1.0 : w)); almostDist2Weight[almostDist] = - weight < WEIGHT_THRESHOLD * fixedPointMult ? (wlut_t)0 : weight; + weight < (wlut_t)(WEIGHT_THRESHOLD * fixedPointMult) ? (wlut_t)0 : weight; } } From 21160137d4a8eaae0be2c2545ab7e18cd3bfc7a3 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 6 Mar 2015 20:44:31 +0100 Subject: [PATCH 29/40] Addition of test cases --- modules/photo/test/ocl/test_denoising.cpp | 50 +++++++++++++++++++---- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index 4aba4b51e..3b6998f06 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -16,7 +16,7 @@ namespace ocl { PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool, bool) { int cn, templateWindowSize, searchWindowSize; - float h; + float h[4]; bool use_roi, use_image; TEST_DECLARE_INPUT_PARAMETER(src); @@ -30,7 +30,10 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool, bool) templateWindowSize = 7; searchWindowSize = 21; - h = 3.0f; + + ASSERT_TRUE(cn > 0 && cn <= 4); + for (int i=0; i Date: Mon, 9 Mar 2015 15:52:16 +0100 Subject: [PATCH 30/40] Changed parameters of fastNlMeansDenoising[Multi][Abs] from float * to std::vector --- modules/photo/include/opencv2/photo.hpp | 40 +- modules/photo/src/denoising.cpp | 482 +++++++++------------- modules/photo/test/ocl/test_denoising.cpp | 5 +- 3 files changed, 225 insertions(+), 302 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index 5e11333ee..d613c2420 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -149,10 +149,10 @@ Should be odd. Recommended value 7 pixels @param searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels -@param h Array of parameters regulating filter strength, one per -channel. Big h value perfectly removes noise but also removes image -details, smaller h value preserves details but also preserves some -noise +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in src. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise This function expected to be applied to grayscale images. For colored images look at fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored @@ -160,7 +160,7 @@ image in different colorspaces. Such approach is used in fastNlMeansDenoisingCol image to CIELAB colorspace and then separately denoise L and AB components with different h parameter. */ -CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float *h, +CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, std::vector h, int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Perform image denoising using Non-local Means Denoising @@ -201,10 +201,10 @@ Should be odd. Recommended value 7 pixels @param searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels -@param h Array of parameters regulating filter strength, one per -channel. Big h value perfectly removes noise but also removes image -details, smaller h value preserves details but also preserves some -noise +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in src. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise This function expected to be applied to grayscale images. For colored images look at fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored @@ -212,7 +212,7 @@ image in different colorspaces. Such approach is used in fastNlMeansDenoisingCol image to CIELAB colorspace and then separately denoise L and AB components with different h parameter. */ -CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, float *h, +CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, std::vector h, int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Modification of fastNlMeansDenoising function for colored images @@ -283,14 +283,14 @@ Should be odd. Recommended value 7 pixels @param searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels -@param h Array of parameters regulating filter strength, one for each -channel. Bigger h value perfectly removes noise but also removes image -details, smaller h value preserves details but also preserves some -noise +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in src. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise */ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, - float *h , int templateWindowSize = 7, int searchWindowSize = 21); + std::vector h , int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been captured in small period @@ -346,14 +346,14 @@ Should be odd. Recommended value 7 pixels @param searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels -@param h Array of parameters regulating filter strength, one for each -channel. Bigger h value perfectly removes noise but also removes image -details, smaller h value preserves details but also preserves some -noise +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in src. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise */ CV_EXPORTS_W void fastNlMeansDenoisingMultiAbs( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, - float *h, int templateWindowSize = 7, int searchWindowSize = 21); + std::vector h, int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 9f63254b0..7251b6446 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -48,55 +48,20 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { - Size src_size = _src.size(); - CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && - src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, &h, 1, - templateWindowSize, searchWindowSize, false)) - - Mat src = _src.getMat(); - _dst.create(src_size, src.type()); - Mat dst = _dst.getMat(); - -#ifdef HAVE_TEGRA_OPTIMIZATION - if(tegra::fastNlMeansDenoising(src, dst, h, templateWindowSize, searchWindowSize)) - return; -#endif - - switch (src.type()) { - case CV_8U: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC2: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC3: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC4: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - default: - CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); - } + fastNlMeansDenoising(_src, _dst, std::vector(1, h), + templateWindowSize, searchWindowSize); } -void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float *h, +void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, std::vector h, int templateWindowSize, int searchWindowSize) { + int hn = h.size(); + CV_Assert(hn == 1 || hn == CV_MAT_CN(_src.type())); + Size src_size = _src.size(); CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, h, CV_MAT_CN(_src.type()), + ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn, templateWindowSize, searchWindowSize, false)) Mat src = _src.getMat(); @@ -111,23 +76,38 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float *h, switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC2: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC3: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC4: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; default: CV_Error(Error::StsBadArg, @@ -138,70 +118,20 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float *h, void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { - Size src_size = _src.size(); - CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && - src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, &h, 1, - templateWindowSize, searchWindowSize, true)) - - Mat src = _src.getMat(); - _dst.create(src_size, src.type()); - Mat dst = _dst.getMat(); - - switch (src.type()) { - case CV_8U: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC2: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC3: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC4: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_16U: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_16UC2: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_16UC3: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_16UC4: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( - src, dst, templateWindowSize, searchWindowSize, &h)); - break; - default: - CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_8UC4, CV_16U, CV_16UC2, CV_16UC3 and CV_16UC4 are supported"); - } + fastNlMeansDenoisingAbs(_src, _dst, std::vector(1, h), + templateWindowSize, searchWindowSize); } -void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float *h, +void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, std::vector h, int templateWindowSize, int searchWindowSize) { + int hn = h.size(); + CV_Assert(hn == 1 || hn == CV_MAT_CN(_src.type())); + Size src_size = _src.size(); CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, h, CV_MAT_CN(_src.type()), + ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn, templateWindowSize, searchWindowSize, true)) Mat src = _src.getMat(); @@ -211,43 +141,73 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float *h, switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC2: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC3: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC4: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16U: parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, h)); + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC2: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC3: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC4: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( - src, dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); break; default: CV_Error(Error::StsBadArg, @@ -332,51 +292,14 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds int imgToDenoiseIndex, int temporalWindowSize, float h, int templateWindowSize, int searchWindowSize) { - std::vector srcImgs; - _srcImgs.getMatVector(srcImgs); - - fastNlMeansDenoisingMultiCheckPreconditions( - srcImgs, imgToDenoiseIndex, - temporalWindowSize, templateWindowSize, searchWindowSize); - - _dst.create(srcImgs[0].size(), srcImgs[0].type()); - Mat dst = _dst.getMat(); - - switch (srcImgs[0].type()) - { - case CV_8U: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC2: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC3: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC4: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - default: - CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); - } + fastNlMeansDenoisingMulti(_srcImgs, _dst, imgToDenoiseIndex, temporalWindowSize, + std::vector(1, h), templateWindowSize, searchWindowSize); } void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, - float *h, int templateWindowSize, int searchWindowSize) + std::vector h, + int templateWindowSize, int searchWindowSize) { std::vector srcImgs; _srcImgs.getMatVector(srcImgs); @@ -385,6 +308,9 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds srcImgs, imgToDenoiseIndex, temporalWindowSize, templateWindowSize, searchWindowSize); + int hn = h.size(); + CV_Assert(hn == 1 || hn == CV_MAT_CN(srcImgs[0].type())); + _dst.create(srcImgs[0].size(), srcImgs[0].type()); Mat dst = _dst.getMat(); @@ -392,27 +318,45 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds { case CV_8U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC2: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC3: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC4: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; default: CV_Error(Error::StsBadArg, @@ -424,75 +368,14 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray int imgToDenoiseIndex, int temporalWindowSize, float h, int templateWindowSize, int searchWindowSize) { - std::vector srcImgs; - _srcImgs.getMatVector(srcImgs); - - fastNlMeansDenoisingMultiCheckPreconditions( - srcImgs, imgToDenoiseIndex, - temporalWindowSize, templateWindowSize, searchWindowSize); - - _dst.create(srcImgs[0].size(), srcImgs[0].type()); - Mat dst = _dst.getMat(); - - switch (srcImgs[0].type()) - { - case CV_8U: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC2: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC3: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_8UC4: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_16U: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_16UC2: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_16UC3: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - case CV_16UC4: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h)); - break; - default: - CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_8UC4, CV_16U, CV_16UC2, CV_16UC3 and CV_16UC4 are supported"); - } + fastNlMeansDenoisingMulti(_srcImgs, _dst, imgToDenoiseIndex, temporalWindowSize, + std::vector(1, h), templateWindowSize, searchWindowSize); } void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, - float *h, int templateWindowSize, int searchWindowSize) + std::vector h, + int templateWindowSize, int searchWindowSize) { std::vector srcImgs; _srcImgs.getMatVector(srcImgs); @@ -501,6 +384,9 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray srcImgs, imgToDenoiseIndex, temporalWindowSize, templateWindowSize, searchWindowSize); + int hn = h.size(); + CV_Assert(hn == 1 || hn == CV_MAT_CN(srcImgs[0].type())); + _dst.create(srcImgs[0].size(), srcImgs[0].type()); Mat dst = _dst.getMat(); @@ -508,51 +394,87 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray { case CV_8U: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC2: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC3: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC4: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16U: parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC2: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC3: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC4: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, h)); + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); break; default: CV_Error(Error::StsBadArg, diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index 3b6998f06..360c16296 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -16,7 +16,7 @@ namespace ocl { PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool, bool) { int cn, templateWindowSize, searchWindowSize; - float h[4]; + std::vector h; bool use_roi, use_image; TEST_DECLARE_INPUT_PARAMETER(src); @@ -31,7 +31,7 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool, bool) templateWindowSize = 7; searchWindowSize = 21; - ASSERT_TRUE(cn > 0 && cn <= 4); + h.resize(cn); for (int i=0; i 0 && cn <= 4); if (cn == 2) { int from_to[] = { 0,0, 1,1 }; src_roi.create(roiSize, type); From a594a0677afb7106791cfaf2e2d129fa0690d426 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Mon, 9 Mar 2015 16:00:24 +0100 Subject: [PATCH 31/40] Cleanup --- modules/photo/src/denoising.cpp | 72 ++++++++++++++++----------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 7251b6446..c42ac4567 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -82,31 +82,31 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, std::vector( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC3: if (hn == 1) parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC4: if (hn == 1) parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; default: @@ -147,31 +147,31 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, std::vector case CV_8UC2: if (hn == 1) parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC3: if (hn == 1) parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC4: if (hn == 1) parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16U: @@ -182,31 +182,31 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, std::vector case CV_16UC2: if (hn == 1) parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC3: if (hn == 1) parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC4: if (hn == 1) parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( src, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( + FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( src, dst, templateWindowSize, searchWindowSize, &h[0])); break; default: @@ -325,36 +325,36 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds case CV_8UC2: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC3: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC4: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; @@ -401,36 +401,36 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray case CV_8UC2: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC3: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_8UC4: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; @@ -443,36 +443,36 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray case CV_16UC2: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC3: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; case CV_16UC4: if (hn == 1) parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); else parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( + FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, &h[0])); break; From 4b5753daea25f0ba439d7f82f7d320d9ff743d8a Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Mon, 9 Mar 2015 16:11:18 +0100 Subject: [PATCH 32/40] Corrected documentation --- modules/photo/include/opencv2/photo.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index d613c2420..1867d3ef0 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -150,7 +150,7 @@ Should be odd. Recommended value 7 pixels given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels @param h Array of parameters regulating filter strength, either one -parameter applied to all channels or one per channel in src. Big h value +parameter applied to all channels or one per channel in dst. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise @@ -202,7 +202,7 @@ Should be odd. Recommended value 7 pixels given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels @param h Array of parameters regulating filter strength, either one -parameter applied to all channels or one per channel in src. Big h value +parameter applied to all channels or one per channel in dst. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise @@ -284,7 +284,7 @@ Should be odd. Recommended value 7 pixels given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels @param h Array of parameters regulating filter strength, either one -parameter applied to all channels or one per channel in src. Big h value +parameter applied to all channels or one per channel in dst. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise */ @@ -347,7 +347,7 @@ Should be odd. Recommended value 7 pixels given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater denoising time. Recommended value 21 pixels @param h Array of parameters regulating filter strength, either one -parameter applied to all channels or one per channel in src. Big h value +parameter applied to all channels or one per channel in dst. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise */ From b471f9ee2622641426e7e71978ab9e1ba181841b Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Mon, 9 Mar 2015 18:52:25 +0100 Subject: [PATCH 33/40] Fixed call to tegra::fastNlMeansDenoising --- modules/photo/src/denoising.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index c42ac4567..5445d26cd 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -69,7 +69,7 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, std::vector Date: Mon, 9 Mar 2015 23:47:49 +0100 Subject: [PATCH 34/40] Changed parameter type of fastNlMeansDenoising[Multi][Abs] from std::vector to const std::vector& --- modules/photo/include/opencv2/photo.hpp | 14 ++++++++------ modules/photo/src/denoising.cpp | 8 ++++---- .../photo/src/fast_nlmeans_denoising_opencl.hpp | 4 ++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index 1867d3ef0..ff98ba74f 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -160,8 +160,9 @@ image in different colorspaces. Such approach is used in fastNlMeansDenoisingCol image to CIELAB colorspace and then separately denoise L and AB components with different h parameter. */ -CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, std::vector h, - int templateWindowSize = 7, int searchWindowSize = 21); + CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Perform image denoising using Non-local Means Denoising algorithm @@ -212,8 +213,9 @@ image in different colorspaces. Such approach is used in fastNlMeansDenoisingCol image to CIELAB colorspace and then separately denoise L and AB components with different h parameter. */ -CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, std::vector h, - int templateWindowSize = 7, int searchWindowSize = 21); +CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Modification of fastNlMeansDenoising function for colored images @@ -290,7 +292,7 @@ value preserves details but also preserves some noise */ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, - std::vector h , int templateWindowSize = 7, int searchWindowSize = 21); + const std::vector& h , int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been captured in small period @@ -353,7 +355,7 @@ value preserves details but also preserves some noise */ CV_EXPORTS_W void fastNlMeansDenoisingMultiAbs( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, - std::vector h, int templateWindowSize = 7, int searchWindowSize = 21); + const std::vector& h, int templateWindowSize = 7, int searchWindowSize = 21); /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 7dde96081..5243b4330 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -52,7 +52,7 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, templateWindowSize, searchWindowSize); } -void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, std::vector h, +void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector& h, int templateWindowSize, int searchWindowSize) { int hn = h.size(); @@ -123,7 +123,7 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, templateWindowSize, searchWindowSize); } -void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, std::vector h, +void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, const std::vector& h, int templateWindowSize, int searchWindowSize) { int hn = h.size(); @@ -299,7 +299,7 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, - std::vector h, + const std::vector& h, int templateWindowSize, int searchWindowSize) { std::vector srcImgs; @@ -375,7 +375,7 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, - std::vector h, + const std::vector& h, int templateWindowSize, int searchWindowSize) { std::vector srcImgs; diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index a06dc6192..b7fdc7cf9 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -29,7 +29,7 @@ static int divUp(int a, int b) } template -static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT *h, int hn, int cn, +static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, const FT *h, int hn, int cn, int & almostTemplateWindowSizeSqBinShift, bool abs) { const WT maxEstimateSumValue = searchWindowSize * searchWindowSize * @@ -78,7 +78,7 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow return k.run(1, globalsize, NULL, false); } -static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float *h, int hn, +static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn, int templateWindowSize, int searchWindowSize, bool abs) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); From 812edb5fdc8eaa72151994280fb673a0d48fb62b Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 10 Mar 2015 01:34:02 +0100 Subject: [PATCH 35/40] Fixed bug, maxDist() should reurn int, not double --- modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp index 9833ea7d3..8f31e8b02 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp @@ -176,7 +176,7 @@ public: } template - static inline double maxDist() + static inline int maxDist() { return (int)pixelInfo::sampleMax() * pixelInfo::channels; } @@ -298,7 +298,7 @@ public: } template - static inline double maxDist() + static inline int maxDist() { return (int)pixelInfo::sampleMax() * (int)pixelInfo::sampleMax() * pixelInfo::channels; From 82c54104d6901e03027240cd9c6866f6b2509d0a Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 10 Mar 2015 01:39:43 +0100 Subject: [PATCH 36/40] Fix warnings on Win x64 --- modules/photo/src/denoising.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 5243b4330..fb3889339 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -55,7 +55,7 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector& h, int templateWindowSize, int searchWindowSize) { - int hn = h.size(); + int hn = (int)h.size(); CV_Assert(hn == 1 || hn == CV_MAT_CN(_src.type())); Size src_size = _src.size(); @@ -126,7 +126,7 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, const std::vector& h, int templateWindowSize, int searchWindowSize) { - int hn = h.size(); + int hn = (int)h.size(); CV_Assert(hn == 1 || hn == CV_MAT_CN(_src.type())); Size src_size = _src.size(); @@ -309,7 +309,7 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds srcImgs, imgToDenoiseIndex, temporalWindowSize, templateWindowSize, searchWindowSize); - int hn = h.size(); + int hn = (int)h.size(); CV_Assert(hn == 1 || hn == CV_MAT_CN(srcImgs[0].type())); _dst.create(srcImgs[0].size(), srcImgs[0].type()); @@ -385,7 +385,7 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray srcImgs, imgToDenoiseIndex, temporalWindowSize, templateWindowSize, searchWindowSize); - int hn = h.size(); + int hn = (int)h.size(); CV_Assert(hn == 1 || hn == CV_MAT_CN(srcImgs[0].type())); _dst.create(srcImgs[0].size(), srcImgs[0].type()); From 9fff7896c528b44df92fe63b4bde59f85f98e1be Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Mon, 23 Mar 2015 23:23:35 +0100 Subject: [PATCH 37/40] Removed fastNlMeansDenoisingColored[Multi]Abs --- modules/photo/include/opencv2/photo.hpp | 118 +----------- modules/photo/src/denoising.cpp | 218 ---------------------- modules/photo/test/ocl/test_denoising.cpp | 34 ---- 3 files changed, 1 insertion(+), 369 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index ff98ba74f..85336c936 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -160,63 +160,10 @@ image in different colorspaces. Such approach is used in fastNlMeansDenoisingCol image to CIELAB colorspace and then separately denoise L and AB components with different h parameter. */ - CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, +CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, const std::vector& h, int templateWindowSize = 7, int searchWindowSize = 21); -/** @brief Perform image denoising using Non-local Means Denoising -algorithm -with several computational optimizations. Noise expected to be a -gaussian white noise. Uses squared sum of absolute value distances -instead of sum of squared distances for weight calculation - -@param src Input 8-bit or 16-bit 1-channel, 2-channel, 3-channel or 4-channel image. -@param dst Output image with the same size and type as src . -@param templateWindowSize Size in pixels of the template patch that is used to compute weights. -Should be odd. Recommended value 7 pixels -@param searchWindowSize Size in pixels of the window that is used to compute weighted average for -given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater -denoising time. Recommended value 21 pixels -@param h Parameter regulating filter strength. Big h value perfectly removes noise but also -removes image details, smaller h value preserves details but also preserves some noise - -This function expected to be applied to grayscale images. For colored images look at -fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored -image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting -image to CIELAB colorspace and then separately denoise L and AB components with different h -parameter. - */ -CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, float h = 3, - int templateWindowSize = 7, int searchWindowSize = 21); - -/** @brief Perform image denoising using Non-local Means Denoising -algorithm -with several computational optimizations. Noise expected to be a -gaussian white noise. Uses squared sum of absolute value distances -instead of sum of squared distances for weight calculation - -@param src Input 8-bit or 16-bit 1-channel, 2-channel, 3-channel or 4-channel image. -@param dst Output image with the same size and type as src . -@param templateWindowSize Size in pixels of the template patch that is used to compute weights. -Should be odd. Recommended value 7 pixels -@param searchWindowSize Size in pixels of the window that is used to compute weighted average for -given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater -denoising time. Recommended value 21 pixels -@param h Array of parameters regulating filter strength, either one -parameter applied to all channels or one per channel in dst. Big h value -perfectly removes noise but also removes image details, smaller h -value preserves details but also preserves some noise - -This function expected to be applied to grayscale images. For colored images look at -fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored -image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting -image to CIELAB colorspace and then separately denoise L and AB components with different h -parameter. - */ -CV_EXPORTS_W void fastNlMeansDenoisingAbs( InputArray src, OutputArray dst, - const std::vector& h, - int templateWindowSize = 7, int searchWindowSize = 21); - /** @brief Modification of fastNlMeansDenoising function for colored images @param src Input 8-bit 3-channel image. @@ -294,69 +241,6 @@ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputA int imgToDenoiseIndex, int temporalWindowSize, const std::vector& h , int templateWindowSize = 7, int searchWindowSize = 21); -/** @brief Modification of fastNlMeansDenoising function for images -sequence where consequtive images have been captured in small period -of time. For example video. This version of the function is for -grayscale images or for manual manipulation with colorspaces. For more -details see -. Uses -squared sum of absolute value distances instead of sum of squared -distances for weight calculation - -@param srcImgs Input 8-bit or 16-bit 1-channel, 2-channel, 3-channel -or 4-channel images sequence. All images should have the same type and -size. -@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence -@param temporalWindowSize Number of surrounding images to use for target image denoising. Should -be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to -imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise -srcImgs[imgToDenoiseIndex] image. -@param dst Output image with the same size and type as srcImgs images. -@param templateWindowSize Size in pixels of the template patch that is used to compute weights. -Should be odd. Recommended value 7 pixels -@param searchWindowSize Size in pixels of the window that is used to compute weighted average for -given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater -denoising time. Recommended value 21 pixels -@param h Parameter regulating filter strength. Bigger h value -perfectly removes noise but also removes image details, smaller h -value preserves details but also preserves some noise - */ -CV_EXPORTS_W void fastNlMeansDenoisingMultiAbs( InputArrayOfArrays srcImgs, OutputArray dst, - int imgToDenoiseIndex, int temporalWindowSize, - float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); - -/** @brief Modification of fastNlMeansDenoising function for images -sequence where consequtive images have been captured in small period -of time. For example video. This version of the function is for -grayscale images or for manual manipulation with colorspaces. For more -details see -. Uses -squared sum of absolute value distances instead of sum of squared -distances for weight calculation - -@param srcImgs Input 8-bit or 16-bit 1-channel, 2-channel, 3-channel -or 4-channel images sequence. All images should have the same type and -size. -@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence -@param temporalWindowSize Number of surrounding images to use for target image denoising. Should -be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to -imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise -srcImgs[imgToDenoiseIndex] image. -@param dst Output image with the same size and type as srcImgs images. -@param templateWindowSize Size in pixels of the template patch that is used to compute weights. -Should be odd. Recommended value 7 pixels -@param searchWindowSize Size in pixels of the window that is used to compute weighted average for -given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater -denoising time. Recommended value 21 pixels -@param h Array of parameters regulating filter strength, either one -parameter applied to all channels or one per channel in dst. Big h value -perfectly removes noise but also removes image details, smaller h -value preserves details but also preserves some noise - */ -CV_EXPORTS_W void fastNlMeansDenoisingMultiAbs( InputArrayOfArrays srcImgs, OutputArray dst, - int imgToDenoiseIndex, int temporalWindowSize, - const std::vector& h, int templateWindowSize = 7, int searchWindowSize = 21); - /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences @param srcImgs Input 8-bit 3-channel images sequence. All images should have the same type and diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index fb3889339..cd8a751f6 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -116,106 +116,6 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vec } } -void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h, - int templateWindowSize, int searchWindowSize) -{ - fastNlMeansDenoisingAbs(_src, _dst, std::vector(1, h), - templateWindowSize, searchWindowSize); -} - -void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, const std::vector& h, - int templateWindowSize, int searchWindowSize) -{ - int hn = (int)h.size(); - CV_Assert(hn == 1 || hn == CV_MAT_CN(_src.type())); - - Size src_size = _src.size(); - CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && - src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes - ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn, - templateWindowSize, searchWindowSize, true)) - - Mat src = _src.getMat(); - _dst.create(src_size, src.type()); - Mat dst = _dst.getMat(); - - switch (src.type()) { - case CV_8U: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC2: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC3: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC4: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_16U: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_16UC2: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_16UC3: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_16UC4: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, int>( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - default: - CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_8UC4, CV_16U, CV_16UC2, CV_16UC3 and CV_16UC4 are supported"); - } -} - void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, float h, float hForColorComponents, int templateWindowSize, int searchWindowSize) @@ -365,124 +265,6 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds } } -void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray _dst, - int imgToDenoiseIndex, int temporalWindowSize, - float h, int templateWindowSize, int searchWindowSize) -{ - fastNlMeansDenoisingMulti(_srcImgs, _dst, imgToDenoiseIndex, temporalWindowSize, - std::vector(1, h), templateWindowSize, searchWindowSize); -} - -void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray _dst, - int imgToDenoiseIndex, int temporalWindowSize, - const std::vector& h, - int templateWindowSize, int searchWindowSize) -{ - std::vector srcImgs; - _srcImgs.getMatVector(srcImgs); - - fastNlMeansDenoisingMultiCheckPreconditions( - srcImgs, imgToDenoiseIndex, - temporalWindowSize, templateWindowSize, searchWindowSize); - - int hn = (int)h.size(); - CV_Assert(hn == 1 || hn == CV_MAT_CN(srcImgs[0].type())); - - _dst.create(srcImgs[0].size(), srcImgs[0].type()); - Mat dst = _dst.getMat(); - - switch (srcImgs[0].type()) - { - case CV_8U: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC2: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC3: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC4: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_16U: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_16UC2: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec2i>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_16UC3: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec3i>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_16UC4: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, int>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker, int64, uint64, DistAbs, Vec4i>( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - default: - CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3, CV_8UC4, CV_16U, CV_16UC2, CV_16UC3 and CV_16UC4 are supported"); - } -} - void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, float h, float hForColorComponents, diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index 360c16296..55b5a9e59 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -103,36 +103,6 @@ OCL_TEST_P(FastNlMeansDenoising_hsep, Mat) } } -typedef FastNlMeansDenoisingTestBase FastNlMeansDenoisingAbs; - -OCL_TEST_P(FastNlMeansDenoisingAbs, Mat) -{ - for (int j = 0; j < test_loop_times; j++) - { - generateTestData(); - - OCL_OFF(cv::fastNlMeansDenoisingAbs(src_roi, dst_roi, h[0], templateWindowSize, searchWindowSize)); - OCL_ON(cv::fastNlMeansDenoisingAbs(usrc_roi, udst_roi, h[0], templateWindowSize, searchWindowSize)); - - OCL_EXPECT_MATS_NEAR(dst, 1); - } -} - -typedef FastNlMeansDenoisingTestBase FastNlMeansDenoisingAbs_hsep; - -OCL_TEST_P(FastNlMeansDenoisingAbs_hsep, Mat) -{ - for (int j = 0; j < test_loop_times; j++) - { - generateTestData(); - - OCL_OFF(cv::fastNlMeansDenoisingAbs(src_roi, dst_roi, h, templateWindowSize, searchWindowSize)); - OCL_ON(cv::fastNlMeansDenoisingAbs(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize)); - - OCL_EXPECT_MATS_NEAR(dst, 1); - } -} - typedef FastNlMeansDenoisingTestBase FastNlMeansDenoisingColored; OCL_TEST_P(FastNlMeansDenoisingColored, Mat) @@ -152,10 +122,6 @@ OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2, 3, 4), Bool(), Values(true))); OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep, Combine(Values(1, 2, 3, 4), Bool(), Values(true))); -OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingAbs, - Combine(Values(1, 2, 3, 4), Bool(), Values(true))); -OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingAbs_hsep, - Combine(Values(1, 2, 3, 4), Bool(), Values(true))); OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool(), Values(false))); From 5f8d688664b3a1466f587a0be324463149347909 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 24 Mar 2015 00:47:11 +0100 Subject: [PATCH 38/40] Added parameter normType to fastNlMeansDenoising --- modules/photo/include/opencv2/photo.hpp | 4 +- modules/photo/src/denoising.cpp | 127 ++++++++++++------ .../src/fast_nlmeans_denoising_opencl.hpp | 29 ++-- 3 files changed, 103 insertions(+), 57 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index 85336c936..0a42424cb 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -153,6 +153,7 @@ denoising time. Recommended value 21 pixels parameter applied to all channels or one per channel in dst. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise +@param normType Type of norm used for weight calcluation. Can be either NORM_L2 or NORM_L1 This function expected to be applied to grayscale images. For colored images look at fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored @@ -162,7 +163,8 @@ parameter. */ CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, const std::vector& h, - int templateWindowSize = 7, int searchWindowSize = 21); + int templateWindowSize = 7, int searchWindowSize = 21, + int normType = NORM_L2); /** @brief Modification of fastNlMeansDenoising function for colored images diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index cd8a751f6..4e7922e40 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -45,6 +45,54 @@ #include "fast_nlmeans_multi_denoising_invoker.hpp" #include "fast_nlmeans_denoising_opencl.hpp" +template +static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector& h, + int templateWindowSize, int searchWindowSize) +{ + int hn = (int)h.size(); + + switch (CV_MAT_CN(src.type())) { + case 1: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case 2: + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, Vec2i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case 3: + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, Vec3i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case 4: + if (hn == 1) + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, int>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, IT, UIT, D, Vec4i>( + src, dst, templateWindowSize, searchWindowSize, &h[0])); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported number of channels! Only 1, 2, 3, and 4 are supported"); + } +} + void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, int templateWindowSize, int searchWindowSize) { @@ -53,66 +101,59 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, } void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector& h, - int templateWindowSize, int searchWindowSize) + int templateWindowSize, int searchWindowSize, int normType) { - int hn = (int)h.size(); - CV_Assert(hn == 1 || hn == CV_MAT_CN(_src.type())); + int hn = (int)h.size(), type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert(hn == 1 || hn == cn); Size src_size = _src.size(); CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) && src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn, - templateWindowSize, searchWindowSize, false)) + templateWindowSize, searchWindowSize, normType)) Mat src = _src.getMat(); _dst.create(src_size, src.type()); Mat dst = _dst.getMat(); + switch (normType) { + case NORM_L2: #ifdef HAVE_TEGRA_OPTIMIZATION - if(hn == 1 && tegra::useTegra() && - tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize)) - return; + if(hn == 1 && tegra::useTegra() && + tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize)) + return; #endif - - switch (src.type()) { - case CV_8U: - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); + switch (depth) { + case CV_8U: + fastNlMeansDenoising_(src, dst, h, + templateWindowSize, + searchWindowSize); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported depth! Only CV_8U is supported for NORM_L2"); + } break; - case CV_8UC2: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC3: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC4: - if (hn == 1) - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( - src, dst, templateWindowSize, searchWindowSize, &h[0])); + case NORM_L1: + switch (depth) { + case CV_8U: + fastNlMeansDenoising_(src, dst, h, + templateWindowSize, + searchWindowSize); + break; + case CV_16U: + fastNlMeansDenoising_(src, dst, h, + templateWindowSize, + searchWindowSize); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1"); + } break; default: CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); + "Unsupported norm type! Only NORM_L2 and NORM_L1 are supported"); } } diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index b7fdc7cf9..1c511f37b 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -29,8 +29,10 @@ static int divUp(int a, int b) } template -static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, const FT *h, int hn, int cn, - int & almostTemplateWindowSizeSqBinShift, bool abs) +static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, + int searchWindowSize, int templateWindowSize, + const FT *h, int hn, int cn, int normType, + int & almostTemplateWindowSizeSqBinShift) { const WT maxEstimateSumValue = searchWindowSize * searchWindowSize * std::numeric_limits::max(); @@ -50,7 +52,7 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq; const FT WEIGHT_THRESHOLD = 1e-3f; - int maxDist = abs ? std::numeric_limits::max() * cn : + int maxDist = normType == NORM_L1 ? std::numeric_limits::max() * cn : std::numeric_limits::max() * std::numeric_limits::max() * cn; int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1); FT den[4]; @@ -66,7 +68,8 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow " -D wlut_t=%s -D convert_wlut_t=%s%s%s", ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)), ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf), - doubleSupport ? " -D DOUBLE_SUPPORT" : "", abs ? " -D ABS" : "")); + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + normType == NORM_L1 ? " -D ABS" : "")); if (k.empty()) return false; @@ -79,13 +82,14 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow } static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn, - int templateWindowSize, int searchWindowSize, bool abs) + int templateWindowSize, int searchWindowSize, int normType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT; Size size = _src.size(); - if (cn != 1 && cn != 2 && cn != 3 && cn != 4 && depth != CV_8U && (!abs || depth != CV_16U)) + if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) && + (normType != NORM_L1 || (depth != CV_8U && depth != CV_16U)))) return false; int templateWindowHalfWize = templateWindowSize / 2; @@ -117,7 +121,8 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const fl ctaSize, templateWindowHalfWize, searchWindowHalfSize, ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, (depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn), - ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : ""); + ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), + normType == NORM_L1 ? " -D ABS" : ""); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); if (k.empty()) @@ -127,15 +132,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const fl if ((depth == CV_8U && !ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, - h, hn, cn, - almostTemplateWindowSizeSqBinShift, - abs)) || + h, hn, cn, normType, + almostTemplateWindowSizeSqBinShift)) || (depth == CV_16U && !ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, - h, hn, cn, - almostTemplateWindowSizeSqBinShift, - abs))) + h, hn, cn, normType, + almostTemplateWindowSizeSqBinShift))) return false; CV_Assert(almostTemplateWindowSizeSqBinShift >= 0); From 70a64ebe728584e3223e6984ecc04998b043d405 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 24 Mar 2015 01:16:17 +0100 Subject: [PATCH 39/40] Added test cases --- modules/photo/test/ocl/test_denoising.cpp | 25 +++++++++++++---------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index 55b5a9e59..f749564c6 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -13,9 +13,9 @@ namespace cvtest { namespace ocl { -PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool, bool) +PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, int, bool, bool) { - int cn, templateWindowSize, searchWindowSize; + int cn, normType, templateWindowSize, searchWindowSize; std::vector h; bool use_roi, use_image; @@ -25,8 +25,9 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool, bool) virtual void SetUp() { cn = GET_PARAM(0); - use_roi = GET_PARAM(1); - use_image = GET_PARAM(2); + normType = GET_PARAM(1); + use_roi = GET_PARAM(2); + use_image = GET_PARAM(3); templateWindowSize = 7; searchWindowSize = 21; @@ -81,8 +82,8 @@ OCL_TEST_P(FastNlMeansDenoising, Mat) { generateTestData(); - OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h[0], templateWindowSize, searchWindowSize)); - OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h[0], templateWindowSize, searchWindowSize)); + OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, std::vector(1, h[0]), templateWindowSize, searchWindowSize, normType)); + OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, std::vector(1, h[0]), templateWindowSize, searchWindowSize, normType)); OCL_EXPECT_MATS_NEAR(dst, 1); } @@ -96,8 +97,8 @@ OCL_TEST_P(FastNlMeansDenoising_hsep, Mat) { generateTestData(); - OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize)); - OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize)); + OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize, normType)); + OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize, normType)); OCL_EXPECT_MATS_NEAR(dst, 1); } @@ -119,11 +120,13 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat) } OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, - Combine(Values(1, 2, 3, 4), Bool(), Values(true))); + Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1), + Bool(), Values(true))); OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep, - Combine(Values(1, 2, 3, 4), Bool(), Values(true))); + Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1), + Bool(), Values(true))); OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, - Combine(Values(3, 4), Bool(), Values(false))); + Combine(Values(3, 4), Values((int)NORM_L2), Bool(), Values(false))); } } // namespace cvtest::ocl From 01d3df0d00e5c802108b90bd2dedb50e9a9ecacf Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 24 Mar 2015 02:01:31 +0100 Subject: [PATCH 40/40] Added normType parameter to fastNlMeansDenoisingMulti --- modules/photo/include/opencv2/photo.hpp | 18 +-- modules/photo/src/denoising.cpp | 141 ++++++++++++++++-------- 2 files changed, 107 insertions(+), 52 deletions(-) diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index 0a42424cb..c651b9ee3 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -142,7 +142,8 @@ CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h with several computational optimizations. Noise expected to be a gaussian white noise -@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image. +@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel, +2-channel, 3-channel or 4-channel image. @param dst Output image with the same size and type as src . @param templateWindowSize Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels @@ -153,7 +154,7 @@ denoising time. Recommended value 21 pixels parameter applied to all channels or one per channel in dst. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise -@param normType Type of norm used for weight calcluation. Can be either NORM_L2 or NORM_L1 +@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1 This function expected to be applied to grayscale images. For colored images look at fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored @@ -220,9 +221,9 @@ captured in small period of time. For example video. This version of the functio images or for manual manipulation with colorspaces. For more details see -@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or -4-channel images sequence. All images should have the same type and -size. +@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel, +2-channel, 3-channel or 4-channel images sequence. All images should +have the same type and size. @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence @param temporalWindowSize Number of surrounding images to use for target image denoising. Should be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to @@ -238,10 +239,13 @@ denoising time. Recommended value 21 pixels parameter applied to all channels or one per channel in dst. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise +@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1 */ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, - int imgToDenoiseIndex, int temporalWindowSize, - const std::vector& h , int templateWindowSize = 7, int searchWindowSize = 21); + int imgToDenoiseIndex, int temporalWindowSize, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21, + int normType = NORM_L2); /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 4e7922e40..c68d09b92 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -230,6 +230,64 @@ static void fastNlMeansDenoisingMultiCheckPreconditions( } } +template +static void fastNlMeansDenoisingMulti_( const std::vector& srcImgs, Mat& dst, + int imgToDenoiseIndex, int temporalWindowSize, + const std::vector& h, + int templateWindowSize, int searchWindowSize) +{ + int hn = (int)h.size(); + + switch (srcImgs[0].type()) + { + case CV_8U: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case CV_8UC2: + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, Vec2i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case CV_8UC3: + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, Vec3i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + break; + case CV_8UC4: + if (hn == 1) + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, int>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + else + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, IT, UIT, D, Vec4i>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, &h[0])); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); + } +} + void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, float h, int templateWindowSize, int searchWindowSize) @@ -241,7 +299,7 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, const std::vector& h, - int templateWindowSize, int searchWindowSize) + int templateWindowSize, int searchWindowSize, int normType) { std::vector srcImgs; _srcImgs.getMatVector(srcImgs); @@ -251,58 +309,51 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds temporalWindowSize, templateWindowSize, searchWindowSize); int hn = (int)h.size(); - CV_Assert(hn == 1 || hn == CV_MAT_CN(srcImgs[0].type())); + int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert(hn == 1 || hn == cn); _dst.create(srcImgs[0].size(), srcImgs[0].type()); Mat dst = _dst.getMat(); - switch (srcImgs[0].type()) - { - case CV_8U: - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); + switch (normType) { + case NORM_L2: + switch (depth) { + case CV_8U: + fastNlMeansDenoisingMulti_(srcImgs, dst, + imgToDenoiseIndex, temporalWindowSize, + h, + templateWindowSize, searchWindowSize); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported depth! Only CV_8U is supported for NORM_L2"); + } break; - case CV_8UC2: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC3: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - break; - case CV_8UC4: - if (hn == 1) - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); - else - parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( - srcImgs, imgToDenoiseIndex, temporalWindowSize, - dst, templateWindowSize, searchWindowSize, &h[0])); + case NORM_L1: + switch (depth) { + case CV_8U: + fastNlMeansDenoisingMulti_(srcImgs, dst, + imgToDenoiseIndex, temporalWindowSize, + h, + templateWindowSize, searchWindowSize); + break; + case CV_16U: + fastNlMeansDenoisingMulti_(srcImgs, dst, + imgToDenoiseIndex, temporalWindowSize, + h, + templateWindowSize, searchWindowSize); + break; + default: + CV_Error(Error::StsBadArg, + "Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1"); + } break; default: CV_Error(Error::StsBadArg, - "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported"); + "Unsupported norm type! Only NORM_L2 and NORM_L1 are supported"); } }