From 42db9e7153a6d10b429df0bc2108278251c11ebc Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Thu, 12 Feb 2015 22:14:01 +0100 Subject: [PATCH] Basic 16-bit implmentation of fastNlMeansDenoising. Table-based exponetiation leads to high memory footprint and loss of precision in 16-bit mode. --- modules/photo/src/denoising.cpp | 43 ++++++++++++++++--- .../src/fast_nlmeans_denoising_invoker.hpp | 14 +++--- .../fast_nlmeans_multi_denoising_invoker.hpp | 15 ++++--- 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 724ea0eb0..0abeefe5b 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -65,17 +65,32 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, src.rows), - FastNlMeansDenoisingInvoker( + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16U: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC2: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64>( + src, dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC3: + parallel_for_(cv::Range(0, src.rows), + FastNlMeansDenoisingInvoker, int64, uint64>( src, dst, templateWindowSize, searchWindowSize, h)); break; default: @@ -181,13 +196,31 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds break; case CV_8UC2: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for_(cv::Range(0, srcImgs[0].rows), - FastNlMeansMultiDenoisingInvoker( + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16U: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC2: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64>( + srcImgs, imgToDenoiseIndex, temporalWindowSize, + dst, templateWindowSize, searchWindowSize, h)); + break; + case CV_16UC3: + parallel_for_(cv::Range(0, srcImgs[0].rows), + FastNlMeansMultiDenoisingInvoker, int64, uint64>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 202e36013..27a016ae9 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -123,11 +123,13 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift - // squared distances are truncated to 16 bits to get a reasonable table size + // squared distances are truncated to 24 bits to avoid unreasonable table sizes + // TODO: uses lots of memory and loses precision wtih 16-bit images ???? + const size_t TABLE_MAX_BITS = 24; CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX) int template_window_size_sq = template_window_size_ * template_window_size_; - almost_template_window_size_sq_bin_shift_ = - getNearestPowerOf2(template_window_size_sq) + 2*pixelInfo::sampleBits() - 16; + almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq) + + std::max(2*pixelInfo::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS; double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; IT max_dist = @@ -139,7 +141,7 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -232,7 +234,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co // calc weights IT estimation[3], weights_sum = 0; - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = 0; for (int y = 0; y < search_window_size_; y++) @@ -250,7 +252,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co } } - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum/2) / weights_sum; dst_.at(i,j) = saturateCastFromArray(estimation); diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 48276b426..c90249b82 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -131,12 +131,15 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift - // squared distances are truncated to 16 bits to get a reasonable table size + // squared distances are truncated to 24 bits to avoid unreasonable table sizes + // TODO: uses lots of memory and loses precision wtih 16-bit images ???? + const size_t TABLE_MAX_BITS = 24; int template_window_size_sq = template_window_size_ * template_window_size_; almost_template_window_size_sq_bin_shift = 0; while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq) almost_template_window_size_sq_bin_shift++; - almost_template_window_size_sq_bin_shift += 2*pixelInfo::sampleBits() - 16; + almost_template_window_size_sq_bin_shift += + std::max(2*pixelInfo::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS; int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; @@ -150,7 +153,7 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { double dist = almost_dist * almost_dist2actual_dist_multiplier; - IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); + IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) weight = 0; @@ -254,7 +257,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang IT weights_sum = 0; IT estimation[3]; - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) estimation[channel_num] = 0; for (int d = 0; d < temporal_window_size_; d++) @@ -279,8 +282,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang } } - for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) - estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum / 2) / weights_sum; // ???? + for (size_t channel_num = 0; channel_num < pixelInfo::channels; channel_num++) + estimation[channel_num] = (static_cast(estimation[channel_num]) + weights_sum / 2) / weights_sum; dst_.at(i,j) = saturateCastFromArray(estimation);