Basic 16-bit implmentation of fastNlMeansDenoising. Table-based exponetiation leads to high memory footprint and loss of precision in 16-bit mode.
This commit is contained in:
parent
49e93747b1
commit
42db9e7153
@ -65,17 +65,32 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
|
||||
switch (src.type()) {
|
||||
case CV_8U:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<uchar, int, unsigned int>(
|
||||
FastNlMeansDenoisingInvoker<uchar, int, unsigned>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC2:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned int>(
|
||||
FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC3:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned int>(
|
||||
FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_16U:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<unsigned short, int64, uint64>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_16UC2:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_16UC3:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
default:
|
||||
@ -181,13 +196,31 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
|
||||
break;
|
||||
case CV_8UC2:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned int>(
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC3:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned int>(
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_16U:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<ushort, int64, uint64>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_16UC2:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_16UC3:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
|
@ -123,11 +123,13 @@ FastNlMeansDenoisingInvoker<T, IT, UIT>::FastNlMeansDenoisingInvoker(
|
||||
|
||||
// precalc weight for every possible l2 dist between blocks
|
||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||
// squared distances are truncated to 16 bits to get a reasonable table size
|
||||
// squared distances are truncated to 24 bits to avoid unreasonable table sizes
|
||||
// TODO: uses lots of memory and loses precision wtih 16-bit images ????
|
||||
const size_t TABLE_MAX_BITS = 24;
|
||||
CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX)
|
||||
int template_window_size_sq = template_window_size_ * template_window_size_;
|
||||
almost_template_window_size_sq_bin_shift_ =
|
||||
getNearestPowerOf2(template_window_size_sq) + 2*pixelInfo<T>::sampleBits() - 16;
|
||||
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq) +
|
||||
std::max(2*pixelInfo<T>::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS;
|
||||
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
|
||||
|
||||
IT max_dist =
|
||||
@ -139,7 +141,7 @@ FastNlMeansDenoisingInvoker<T, IT, UIT>::FastNlMeansDenoisingInvoker(
|
||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||
{
|
||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
||||
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo<T>::channels)));
|
||||
|
||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
||||
weight = 0;
|
||||
@ -232,7 +234,7 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) co
|
||||
|
||||
// calc weights
|
||||
IT estimation[3], weights_sum = 0;
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||
estimation[channel_num] = 0;
|
||||
|
||||
for (int y = 0; y < search_window_size_; y++)
|
||||
@ -250,7 +252,7 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) co
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum/2) / weights_sum;
|
||||
|
||||
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
||||
|
@ -131,12 +131,15 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::FastNlMeansMultiDenoisingInvoker(
|
||||
|
||||
// precalc weight for every possible l2 dist between blocks
|
||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||
// squared distances are truncated to 16 bits to get a reasonable table size
|
||||
// squared distances are truncated to 24 bits to avoid unreasonable table sizes
|
||||
// TODO: uses lots of memory and loses precision wtih 16-bit images ????
|
||||
const size_t TABLE_MAX_BITS = 24;
|
||||
int template_window_size_sq = template_window_size_ * template_window_size_;
|
||||
almost_template_window_size_sq_bin_shift = 0;
|
||||
while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq)
|
||||
almost_template_window_size_sq_bin_shift++;
|
||||
almost_template_window_size_sq_bin_shift += 2*pixelInfo<T>::sampleBits() - 16;
|
||||
almost_template_window_size_sq_bin_shift +=
|
||||
std::max(2*pixelInfo<T>::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS;
|
||||
|
||||
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
|
||||
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
|
||||
@ -150,7 +153,7 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::FastNlMeansMultiDenoisingInvoker(
|
||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||
{
|
||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
||||
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo<T>::channels)));
|
||||
|
||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
||||
weight = 0;
|
||||
@ -254,7 +257,7 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& rang
|
||||
IT weights_sum = 0;
|
||||
|
||||
IT estimation[3];
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||
estimation[channel_num] = 0;
|
||||
|
||||
for (int d = 0; d < temporal_window_size_; d++)
|
||||
@ -279,8 +282,8 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& rang
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum / 2) / weights_sum; // ????
|
||||
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum / 2) / weights_sum;
|
||||
|
||||
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user