Basic 16-bit implmentation of fastNlMeansDenoising. Table-based exponetiation leads to high memory footprint and loss of precision in 16-bit mode.
This commit is contained in:
parent
49e93747b1
commit
42db9e7153
@ -65,17 +65,32 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
|
|||||||
switch (src.type()) {
|
switch (src.type()) {
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<uchar, int, unsigned int>(
|
FastNlMeansDenoisingInvoker<uchar, int, unsigned>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, h));
|
src, dst, templateWindowSize, searchWindowSize, h));
|
||||||
break;
|
break;
|
||||||
case CV_8UC2:
|
case CV_8UC2:
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned int>(
|
FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, h));
|
src, dst, templateWindowSize, searchWindowSize, h));
|
||||||
break;
|
break;
|
||||||
case CV_8UC3:
|
case CV_8UC3:
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned int>(
|
FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, h));
|
||||||
|
break;
|
||||||
|
case CV_16U:
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<unsigned short, int64, uint64>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, h));
|
||||||
|
break;
|
||||||
|
case CV_16UC2:
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, h));
|
||||||
|
break;
|
||||||
|
case CV_16UC3:
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, h));
|
src, dst, templateWindowSize, searchWindowSize, h));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -181,13 +196,31 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
|
|||||||
break;
|
break;
|
||||||
case CV_8UC2:
|
case CV_8UC2:
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned int>(
|
FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, h));
|
dst, templateWindowSize, searchWindowSize, h));
|
||||||
break;
|
break;
|
||||||
case CV_8UC3:
|
case CV_8UC3:
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned int>(
|
FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, h));
|
||||||
|
break;
|
||||||
|
case CV_16U:
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<ushort, int64, uint64>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, h));
|
||||||
|
break;
|
||||||
|
case CV_16UC2:
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, h));
|
||||||
|
break;
|
||||||
|
case CV_16UC3:
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, h));
|
dst, templateWindowSize, searchWindowSize, h));
|
||||||
break;
|
break;
|
||||||
|
@ -123,11 +123,13 @@ FastNlMeansDenoisingInvoker<T, IT, UIT>::FastNlMeansDenoisingInvoker(
|
|||||||
|
|
||||||
// precalc weight for every possible l2 dist between blocks
|
// precalc weight for every possible l2 dist between blocks
|
||||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||||
// squared distances are truncated to 16 bits to get a reasonable table size
|
// squared distances are truncated to 24 bits to avoid unreasonable table sizes
|
||||||
|
// TODO: uses lots of memory and loses precision wtih 16-bit images ????
|
||||||
|
const size_t TABLE_MAX_BITS = 24;
|
||||||
CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX)
|
CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX)
|
||||||
int template_window_size_sq = template_window_size_ * template_window_size_;
|
int template_window_size_sq = template_window_size_ * template_window_size_;
|
||||||
almost_template_window_size_sq_bin_shift_ =
|
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq) +
|
||||||
getNearestPowerOf2(template_window_size_sq) + 2*pixelInfo<T>::sampleBits() - 16;
|
std::max(2*pixelInfo<T>::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS;
|
||||||
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
|
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
|
||||||
|
|
||||||
IT max_dist =
|
IT max_dist =
|
||||||
@ -139,7 +141,7 @@ FastNlMeansDenoisingInvoker<T, IT, UIT>::FastNlMeansDenoisingInvoker(
|
|||||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||||
{
|
{
|
||||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||||
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo<T>::channels)));
|
||||||
|
|
||||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
||||||
weight = 0;
|
weight = 0;
|
||||||
@ -232,7 +234,7 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) co
|
|||||||
|
|
||||||
// calc weights
|
// calc weights
|
||||||
IT estimation[3], weights_sum = 0;
|
IT estimation[3], weights_sum = 0;
|
||||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||||
estimation[channel_num] = 0;
|
estimation[channel_num] = 0;
|
||||||
|
|
||||||
for (int y = 0; y < search_window_size_; y++)
|
for (int y = 0; y < search_window_size_; y++)
|
||||||
@ -250,7 +252,7 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||||
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum/2) / weights_sum;
|
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum/2) / weights_sum;
|
||||||
|
|
||||||
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
||||||
|
@ -131,12 +131,15 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::FastNlMeansMultiDenoisingInvoker(
|
|||||||
|
|
||||||
// precalc weight for every possible l2 dist between blocks
|
// precalc weight for every possible l2 dist between blocks
|
||||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||||
// squared distances are truncated to 16 bits to get a reasonable table size
|
// squared distances are truncated to 24 bits to avoid unreasonable table sizes
|
||||||
|
// TODO: uses lots of memory and loses precision wtih 16-bit images ????
|
||||||
|
const size_t TABLE_MAX_BITS = 24;
|
||||||
int template_window_size_sq = template_window_size_ * template_window_size_;
|
int template_window_size_sq = template_window_size_ * template_window_size_;
|
||||||
almost_template_window_size_sq_bin_shift = 0;
|
almost_template_window_size_sq_bin_shift = 0;
|
||||||
while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq)
|
while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq)
|
||||||
almost_template_window_size_sq_bin_shift++;
|
almost_template_window_size_sq_bin_shift++;
|
||||||
almost_template_window_size_sq_bin_shift += 2*pixelInfo<T>::sampleBits() - 16;
|
almost_template_window_size_sq_bin_shift +=
|
||||||
|
std::max(2*pixelInfo<T>::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS;
|
||||||
|
|
||||||
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
|
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
|
||||||
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
|
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
|
||||||
@ -150,7 +153,7 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::FastNlMeansMultiDenoisingInvoker(
|
|||||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||||
{
|
{
|
||||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||||
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo<T>::channels)));
|
||||||
|
|
||||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
||||||
weight = 0;
|
weight = 0;
|
||||||
@ -254,7 +257,7 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& rang
|
|||||||
IT weights_sum = 0;
|
IT weights_sum = 0;
|
||||||
|
|
||||||
IT estimation[3];
|
IT estimation[3];
|
||||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||||
estimation[channel_num] = 0;
|
estimation[channel_num] = 0;
|
||||||
|
|
||||||
for (int d = 0; d < temporal_window_size_; d++)
|
for (int d = 0; d < temporal_window_size_; d++)
|
||||||
@ -279,8 +282,8 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& rang
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||||
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum / 2) / weights_sum; // ????
|
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum / 2) / weights_sum;
|
||||||
|
|
||||||
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user