Merge pull request #3814 from erikrk:denoising-16bit-master
This commit is contained in:
commit
5501cfd809
@ -442,6 +442,10 @@ template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(
|
|||||||
template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
|
template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
|
||||||
/** @overload */
|
/** @overload */
|
||||||
template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
|
template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
|
||||||
|
/** @overload */
|
||||||
|
template<typename _Tp> static inline _Tp saturate_cast(int64 v) { return _Tp(v); }
|
||||||
|
/** @overload */
|
||||||
|
template<typename _Tp> static inline _Tp saturate_cast(uint64 v) { return _Tp(v); }
|
||||||
|
|
||||||
//! @cond IGNORED
|
//! @cond IGNORED
|
||||||
|
|
||||||
@ -452,6 +456,8 @@ template<> inline uchar saturate_cast<uchar>(short v) { return saturate_c
|
|||||||
template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
|
template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
|
||||||
template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
|
template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
|
||||||
template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
|
template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
|
||||||
|
template<> inline uchar saturate_cast<uchar>(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
|
||||||
|
template<> inline uchar saturate_cast<uchar>(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
|
||||||
|
|
||||||
template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); }
|
template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); }
|
||||||
template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
|
template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
|
||||||
@ -460,6 +466,8 @@ template<> inline schar saturate_cast<schar>(short v) { return saturate_c
|
|||||||
template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
|
template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
|
||||||
template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
|
template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
|
||||||
template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
|
template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
|
||||||
|
template<> inline schar saturate_cast<schar>(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
|
||||||
|
template<> inline schar saturate_cast<schar>(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); }
|
||||||
|
|
||||||
template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); }
|
template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); }
|
||||||
template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); }
|
template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); }
|
||||||
@ -467,12 +475,16 @@ template<> inline ushort saturate_cast<ushort>(int v) { return (ushort)((
|
|||||||
template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
|
template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
|
||||||
template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
|
template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
|
||||||
template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
|
template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
|
||||||
|
template<> inline ushort saturate_cast<ushort>(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
|
||||||
|
template<> inline ushort saturate_cast<ushort>(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); }
|
||||||
|
|
||||||
template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); }
|
template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); }
|
||||||
template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
|
template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
|
||||||
template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); }
|
template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); }
|
||||||
template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
|
template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
|
||||||
template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
|
template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
|
||||||
|
template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
|
||||||
|
template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); }
|
||||||
|
|
||||||
template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
|
template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
|
||||||
template<> inline int saturate_cast<int>(double v) { return cvRound(v); }
|
template<> inline int saturate_cast<int>(double v) { return cvRound(v); }
|
||||||
|
@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask,
|
|||||||
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
|
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
|
||||||
optimizations. Noise expected to be a gaussian white noise
|
optimizations. Noise expected to be a gaussian white noise
|
||||||
|
|
||||||
@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
|
@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image.
|
||||||
@param dst Output image with the same size and type as src .
|
@param dst Output image with the same size and type as src .
|
||||||
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
|
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
|
||||||
Should be odd. Recommended value 7 pixels
|
Should be odd. Recommended value 7 pixels
|
||||||
@ -138,6 +138,35 @@ parameter.
|
|||||||
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3,
|
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3,
|
||||||
int templateWindowSize = 7, int searchWindowSize = 21);
|
int templateWindowSize = 7, int searchWindowSize = 21);
|
||||||
|
|
||||||
|
/** @brief Perform image denoising using Non-local Means Denoising algorithm
|
||||||
|
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
|
||||||
|
optimizations. Noise expected to be a gaussian white noise
|
||||||
|
|
||||||
|
@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
|
||||||
|
2-channel, 3-channel or 4-channel image.
|
||||||
|
@param dst Output image with the same size and type as src .
|
||||||
|
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
|
||||||
|
Should be odd. Recommended value 7 pixels
|
||||||
|
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
|
||||||
|
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
|
||||||
|
denoising time. Recommended value 21 pixels
|
||||||
|
@param h Array of parameters regulating filter strength, either one
|
||||||
|
parameter applied to all channels or one per channel in dst. Big h value
|
||||||
|
perfectly removes noise but also removes image details, smaller h
|
||||||
|
value preserves details but also preserves some noise
|
||||||
|
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
|
||||||
|
|
||||||
|
This function expected to be applied to grayscale images. For colored images look at
|
||||||
|
fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored
|
||||||
|
image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting
|
||||||
|
image to CIELAB colorspace and then separately denoise L and AB components with different h
|
||||||
|
parameter.
|
||||||
|
*/
|
||||||
|
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst,
|
||||||
|
const std::vector<float>& h,
|
||||||
|
int templateWindowSize = 7, int searchWindowSize = 21,
|
||||||
|
int normType = NORM_L2);
|
||||||
|
|
||||||
/** @brief Modification of fastNlMeansDenoising function for colored images
|
/** @brief Modification of fastNlMeansDenoising function for colored images
|
||||||
|
|
||||||
@param src Input 8-bit 3-channel image.
|
@param src Input 8-bit 3-channel image.
|
||||||
@ -165,7 +194,35 @@ captured in small period of time. For example video. This version of the functio
|
|||||||
images or for manual manipulation with colorspaces. For more details see
|
images or for manual manipulation with colorspaces. For more details see
|
||||||
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
|
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
|
||||||
|
|
||||||
@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should
|
@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or
|
||||||
|
4-channel images sequence. All images should have the same type and
|
||||||
|
size.
|
||||||
|
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
|
||||||
|
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
|
||||||
|
be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
|
||||||
|
imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
|
||||||
|
srcImgs[imgToDenoiseIndex] image.
|
||||||
|
@param dst Output image with the same size and type as srcImgs images.
|
||||||
|
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
|
||||||
|
Should be odd. Recommended value 7 pixels
|
||||||
|
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
|
||||||
|
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
|
||||||
|
denoising time. Recommended value 21 pixels
|
||||||
|
@param h Parameter regulating filter strength. Bigger h value
|
||||||
|
perfectly removes noise but also removes image details, smaller h
|
||||||
|
value preserves details but also preserves some noise
|
||||||
|
*/
|
||||||
|
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
|
||||||
|
int imgToDenoiseIndex, int temporalWindowSize,
|
||||||
|
float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
|
||||||
|
|
||||||
|
/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been
|
||||||
|
captured in small period of time. For example video. This version of the function is for grayscale
|
||||||
|
images or for manual manipulation with colorspaces. For more details see
|
||||||
|
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
|
||||||
|
|
||||||
|
@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
|
||||||
|
2-channel, 3-channel or 4-channel images sequence. All images should
|
||||||
have the same type and size.
|
have the same type and size.
|
||||||
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
|
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
|
||||||
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
|
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
|
||||||
@ -178,13 +235,17 @@ Should be odd. Recommended value 7 pixels
|
|||||||
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
|
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
|
||||||
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
|
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
|
||||||
denoising time. Recommended value 21 pixels
|
denoising time. Recommended value 21 pixels
|
||||||
@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly
|
@param h Array of parameters regulating filter strength, either one
|
||||||
removes noise but also removes image details, smaller h value preserves details but also preserves
|
parameter applied to all channels or one per channel in dst. Big h value
|
||||||
some noise
|
perfectly removes noise but also removes image details, smaller h
|
||||||
|
value preserves details but also preserves some noise
|
||||||
|
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
|
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
|
||||||
int imgToDenoiseIndex, int temporalWindowSize,
|
int imgToDenoiseIndex, int temporalWindowSize,
|
||||||
float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
|
const std::vector<float>& h,
|
||||||
|
int templateWindowSize = 7, int searchWindowSize = 21,
|
||||||
|
int normType = NORM_L2);
|
||||||
|
|
||||||
/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences
|
/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences
|
||||||
|
|
||||||
|
@ -45,42 +45,115 @@
|
|||||||
#include "fast_nlmeans_multi_denoising_invoker.hpp"
|
#include "fast_nlmeans_multi_denoising_invoker.hpp"
|
||||||
#include "fast_nlmeans_denoising_opencl.hpp"
|
#include "fast_nlmeans_denoising_opencl.hpp"
|
||||||
|
|
||||||
|
template<typename ST, typename IT, typename UIT, typename D>
|
||||||
|
static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<float>& h,
|
||||||
|
int templateWindowSize, int searchWindowSize)
|
||||||
|
{
|
||||||
|
int hn = (int)h.size();
|
||||||
|
|
||||||
|
switch (CV_MAT_CN(src.type())) {
|
||||||
|
case 1:
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
if (hn == 1)
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
else
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
if (hn == 1)
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
else
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
if (hn == 1)
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
else
|
||||||
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
|
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
|
||||||
|
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsBadArg,
|
||||||
|
"Unsupported number of channels! Only 1, 2, 3, and 4 are supported");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
|
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
|
||||||
int templateWindowSize, int searchWindowSize)
|
int templateWindowSize, int searchWindowSize)
|
||||||
{
|
{
|
||||||
|
fastNlMeansDenoising(_src, _dst, std::vector<float>(1, h),
|
||||||
|
templateWindowSize, searchWindowSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector<float>& h,
|
||||||
|
int templateWindowSize, int searchWindowSize, int normType)
|
||||||
|
{
|
||||||
|
int hn = (int)h.size(), type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
|
CV_Assert(hn == 1 || hn == cn);
|
||||||
|
|
||||||
Size src_size = _src.size();
|
Size src_size = _src.size();
|
||||||
CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) &&
|
CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) &&
|
||||||
src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes
|
src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes
|
||||||
ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize))
|
ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn,
|
||||||
|
templateWindowSize, searchWindowSize, normType))
|
||||||
|
|
||||||
Mat src = _src.getMat();
|
Mat src = _src.getMat();
|
||||||
_dst.create(src_size, src.type());
|
_dst.create(src_size, src.type());
|
||||||
Mat dst = _dst.getMat();
|
Mat dst = _dst.getMat();
|
||||||
|
|
||||||
|
switch (normType) {
|
||||||
|
case NORM_L2:
|
||||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||||
if(tegra::useTegra() && tegra::fastNlMeansDenoising(src, dst, h, templateWindowSize, searchWindowSize))
|
if(hn == 1 && tegra::useTegra() &&
|
||||||
return;
|
tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize))
|
||||||
|
return;
|
||||||
#endif
|
#endif
|
||||||
|
switch (depth) {
|
||||||
switch (src.type()) {
|
case CV_8U:
|
||||||
case CV_8U:
|
fastNlMeansDenoising_<uchar, int, unsigned, DistSquared>(src, dst, h,
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
templateWindowSize,
|
||||||
FastNlMeansDenoisingInvoker<uchar>(
|
searchWindowSize);
|
||||||
src, dst, templateWindowSize, searchWindowSize, h));
|
break;
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsBadArg,
|
||||||
|
"Unsupported depth! Only CV_8U is supported for NORM_L2");
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case CV_8UC2:
|
case NORM_L1:
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
switch (depth) {
|
||||||
FastNlMeansDenoisingInvoker<cv::Vec2b>(
|
case CV_8U:
|
||||||
src, dst, templateWindowSize, searchWindowSize, h));
|
fastNlMeansDenoising_<uchar, int, unsigned, DistAbs>(src, dst, h,
|
||||||
break;
|
templateWindowSize,
|
||||||
case CV_8UC3:
|
searchWindowSize);
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
break;
|
||||||
FastNlMeansDenoisingInvoker<cv::Vec3b>(
|
case CV_16U:
|
||||||
src, dst, templateWindowSize, searchWindowSize, h));
|
fastNlMeansDenoising_<ushort, int64, uint64, DistAbs>(src, dst, h,
|
||||||
|
templateWindowSize,
|
||||||
|
searchWindowSize);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsBadArg,
|
||||||
|
"Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(Error::StsBadArg,
|
CV_Error(Error::StsBadArg,
|
||||||
"Unsupported image format! Only CV_8UC1, CV_8UC2 and CV_8UC3 are supported");
|
"Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,7 +165,7 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
|||||||
Size src_size = _src.size();
|
Size src_size = _src.size();
|
||||||
if (type != CV_8UC3 && type != CV_8UC4)
|
if (type != CV_8UC3 && type != CV_8UC4)
|
||||||
{
|
{
|
||||||
CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!");
|
CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3 or CV_8UC4!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,8 +181,8 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
|||||||
Mat src_lab;
|
Mat src_lab;
|
||||||
cvtColor(src, src_lab, COLOR_LBGR2Lab);
|
cvtColor(src, src_lab, COLOR_LBGR2Lab);
|
||||||
|
|
||||||
Mat l(src_size, CV_8U);
|
Mat l(src_size, CV_MAKE_TYPE(depth, 1));
|
||||||
Mat ab(src_size, CV_8UC2);
|
Mat ab(src_size, CV_MAKE_TYPE(depth, 2));
|
||||||
Mat l_ab[] = { l, ab };
|
Mat l_ab[] = { l, ab };
|
||||||
int from_to[] = { 0,0, 1,1, 2,2 };
|
int from_to[] = { 0,0, 1,1, 2,2 };
|
||||||
mixChannels(&src_lab, 1, l_ab, 2, from_to, 3);
|
mixChannels(&src_lab, 1, l_ab, 2, from_to, 3);
|
||||||
@ -157,9 +230,76 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename ST, typename IT, typename UIT, typename D>
|
||||||
|
static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& dst,
|
||||||
|
int imgToDenoiseIndex, int temporalWindowSize,
|
||||||
|
const std::vector<float>& h,
|
||||||
|
int templateWindowSize, int searchWindowSize)
|
||||||
|
{
|
||||||
|
int hn = (int)h.size();
|
||||||
|
|
||||||
|
switch (srcImgs[0].type())
|
||||||
|
{
|
||||||
|
case CV_8U:
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
break;
|
||||||
|
case CV_8UC2:
|
||||||
|
if (hn == 1)
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
else
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
break;
|
||||||
|
case CV_8UC3:
|
||||||
|
if (hn == 1)
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
else
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
break;
|
||||||
|
case CV_8UC4:
|
||||||
|
if (hn == 1)
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
else
|
||||||
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
|
||||||
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsBadArg,
|
||||||
|
"Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
|
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
|
||||||
int imgToDenoiseIndex, int temporalWindowSize,
|
int imgToDenoiseIndex, int temporalWindowSize,
|
||||||
float h, int templateWindowSize, int searchWindowSize)
|
float h, int templateWindowSize, int searchWindowSize)
|
||||||
|
{
|
||||||
|
fastNlMeansDenoisingMulti(_srcImgs, _dst, imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
std::vector<float>(1, h), templateWindowSize, searchWindowSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
|
||||||
|
int imgToDenoiseIndex, int temporalWindowSize,
|
||||||
|
const std::vector<float>& h,
|
||||||
|
int templateWindowSize, int searchWindowSize, int normType)
|
||||||
{
|
{
|
||||||
std::vector<Mat> srcImgs;
|
std::vector<Mat> srcImgs;
|
||||||
_srcImgs.getMatVector(srcImgs);
|
_srcImgs.getMatVector(srcImgs);
|
||||||
@ -168,32 +308,52 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
|
|||||||
srcImgs, imgToDenoiseIndex,
|
srcImgs, imgToDenoiseIndex,
|
||||||
temporalWindowSize, templateWindowSize, searchWindowSize);
|
temporalWindowSize, templateWindowSize, searchWindowSize);
|
||||||
|
|
||||||
|
int hn = (int)h.size();
|
||||||
|
int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
|
CV_Assert(hn == 1 || hn == cn);
|
||||||
|
|
||||||
_dst.create(srcImgs[0].size(), srcImgs[0].type());
|
_dst.create(srcImgs[0].size(), srcImgs[0].type());
|
||||||
Mat dst = _dst.getMat();
|
Mat dst = _dst.getMat();
|
||||||
|
|
||||||
switch (srcImgs[0].type())
|
switch (normType) {
|
||||||
{
|
case NORM_L2:
|
||||||
case CV_8U:
|
switch (depth) {
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
case CV_8U:
|
||||||
FastNlMeansMultiDenoisingInvoker<uchar>(
|
fastNlMeansDenoisingMulti_<uchar, int, unsigned,
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
DistSquared>(srcImgs, dst,
|
||||||
dst, templateWindowSize, searchWindowSize, h));
|
imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
h,
|
||||||
|
templateWindowSize, searchWindowSize);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsBadArg,
|
||||||
|
"Unsupported depth! Only CV_8U is supported for NORM_L2");
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case CV_8UC2:
|
case NORM_L1:
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
switch (depth) {
|
||||||
FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
|
case CV_8U:
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
fastNlMeansDenoisingMulti_<uchar, int, unsigned,
|
||||||
dst, templateWindowSize, searchWindowSize, h));
|
DistAbs>(srcImgs, dst,
|
||||||
break;
|
imgToDenoiseIndex, temporalWindowSize,
|
||||||
case CV_8UC3:
|
h,
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
templateWindowSize, searchWindowSize);
|
||||||
FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
|
break;
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
case CV_16U:
|
||||||
dst, templateWindowSize, searchWindowSize, h));
|
fastNlMeansDenoisingMulti_<ushort, int64, uint64,
|
||||||
|
DistAbs>(srcImgs, dst,
|
||||||
|
imgToDenoiseIndex, temporalWindowSize,
|
||||||
|
h,
|
||||||
|
templateWindowSize, searchWindowSize);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsBadArg,
|
||||||
|
"Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(Error::StsBadArg,
|
CV_Error(Error::StsBadArg,
|
||||||
"Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported");
|
"Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -212,9 +372,10 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
|
|||||||
_dst.create(srcImgs[0].size(), srcImgs[0].type());
|
_dst.create(srcImgs[0].size(), srcImgs[0].type());
|
||||||
Mat dst = _dst.getMat();
|
Mat dst = _dst.getMat();
|
||||||
|
|
||||||
|
int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type);
|
||||||
int src_imgs_size = static_cast<int>(srcImgs.size());
|
int src_imgs_size = static_cast<int>(srcImgs.size());
|
||||||
|
|
||||||
if (srcImgs[0].type() != CV_8UC3)
|
if (type != CV_8UC3)
|
||||||
{
|
{
|
||||||
CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!");
|
CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!");
|
||||||
return;
|
return;
|
||||||
@ -228,9 +389,9 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
|
|||||||
std::vector<Mat> ab(src_imgs_size);
|
std::vector<Mat> ab(src_imgs_size);
|
||||||
for (int i = 0; i < src_imgs_size; i++)
|
for (int i = 0; i < src_imgs_size; i++)
|
||||||
{
|
{
|
||||||
src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3);
|
src_lab[i] = Mat::zeros(srcImgs[0].size(), type);
|
||||||
l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1);
|
l[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 1));
|
||||||
ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2);
|
ab[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 2));
|
||||||
cvtColor(srcImgs[i], src_lab[i], COLOR_LBGR2Lab);
|
cvtColor(srcImgs[i], src_lab[i], COLOR_LBGR2Lab);
|
||||||
|
|
||||||
Mat l_ab[] = { l[i], ab[i] };
|
Mat l_ab[] = { l[i], ab[i] };
|
||||||
|
@ -50,13 +50,13 @@
|
|||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
struct FastNlMeansDenoisingInvoker :
|
struct FastNlMeansDenoisingInvoker :
|
||||||
public ParallelLoopBody
|
public ParallelLoopBody
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
|
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
|
||||||
int template_window_size, int search_window_size, const float h);
|
int template_window_size, int search_window_size, const float *h);
|
||||||
|
|
||||||
void operator() (const Range& range) const;
|
void operator() (const Range& range) const;
|
||||||
|
|
||||||
@ -75,9 +75,9 @@ private:
|
|||||||
int template_window_half_size_;
|
int template_window_half_size_;
|
||||||
int search_window_half_size_;
|
int search_window_half_size_;
|
||||||
|
|
||||||
int fixed_point_mult_;
|
typename pixelInfo<WT>::sampleType fixed_point_mult_;
|
||||||
int almost_template_window_size_sq_bin_shift_;
|
int almost_template_window_size_sq_bin_shift_;
|
||||||
std::vector<int> almost_dist2weight_;
|
std::vector<WT> almost_dist2weight_;
|
||||||
|
|
||||||
void calcDistSumsForFirstElementInRow(
|
void calcDistSumsForFirstElementInRow(
|
||||||
int i, Array2d<int>& dist_sums,
|
int i, Array2d<int>& dist_sums,
|
||||||
@ -99,15 +99,15 @@ inline int getNearestPowerOf2(int value)
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
|
||||||
const Mat& src, Mat& dst,
|
const Mat& src, Mat& dst,
|
||||||
int template_window_size,
|
int template_window_size,
|
||||||
int search_window_size,
|
int search_window_size,
|
||||||
const float h) :
|
const float *h) :
|
||||||
src_(src), dst_(dst)
|
src_(src), dst_(dst)
|
||||||
{
|
{
|
||||||
CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b
|
CV_Assert(src.channels() == pixelInfo<T>::channels);
|
||||||
|
|
||||||
template_window_half_size_ = template_window_size / 2;
|
template_window_half_size_ = template_window_size / 2;
|
||||||
search_window_half_size_ = search_window_size / 2;
|
search_window_half_size_ = search_window_size / 2;
|
||||||
@ -117,8 +117,10 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
|||||||
border_size_ = search_window_half_size_ + template_window_half_size_;
|
border_size_ = search_window_half_size_ + template_window_half_size_;
|
||||||
copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
|
copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
|
||||||
|
|
||||||
const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255;
|
const IT max_estimate_sum_value =
|
||||||
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
|
(IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
|
||||||
|
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
|
||||||
|
pixelInfo<WT>::sampleMax());
|
||||||
|
|
||||||
// precalc weight for every possible l2 dist between blocks
|
// precalc weight for every possible l2 dist between blocks
|
||||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||||
@ -127,30 +129,24 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
|||||||
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
|
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
|
||||||
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
|
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
|
||||||
|
|
||||||
int max_dist = 255 * 255 * sizeof(T);
|
int max_dist = D::template maxDist<T>();
|
||||||
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
|
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
|
||||||
almost_dist2weight_.resize(almost_max_dist);
|
almost_dist2weight_.resize(almost_max_dist);
|
||||||
|
|
||||||
const double WEIGHT_THRESHOLD = 0.001;
|
|
||||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||||
{
|
{
|
||||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||||
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
almost_dist2weight_[almost_dist] =
|
||||||
|
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
|
||||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
|
||||||
weight = 0;
|
|
||||||
|
|
||||||
almost_dist2weight_[almost_dist] = weight;
|
|
||||||
}
|
}
|
||||||
CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
|
|
||||||
|
|
||||||
// additional optimization init end
|
// additional optimization init end
|
||||||
if (dst_.empty())
|
if (dst_.empty())
|
||||||
dst_ = Mat::zeros(src_.size(), src_.type());
|
dst_ = Mat::zeros(src_.size(), src_.type());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
|
||||||
{
|
{
|
||||||
int row_from = range.start;
|
int row_from = range.start;
|
||||||
int row_to = range.end - 1;
|
int row_to = range.end - 1;
|
||||||
@ -215,7 +211,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
|||||||
dist_sums_row[x] -= col_dist_sums_row[x];
|
dist_sums_row[x] -= col_dist_sums_row[x];
|
||||||
|
|
||||||
int bx = start_bx + x;
|
int bx = start_bx + x;
|
||||||
col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
|
col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
|
||||||
|
|
||||||
dist_sums_row[x] += col_dist_sums_row[x];
|
dist_sums_row[x] += col_dist_sums_row[x];
|
||||||
up_col_dist_sums_row[x] = col_dist_sums_row[x];
|
up_col_dist_sums_row[x] = col_dist_sums_row[x];
|
||||||
@ -227,9 +223,11 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
// calc weights
|
// calc weights
|
||||||
int estimation[3], weights_sum = 0;
|
IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
|
||||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||||
estimation[channel_num] = 0;
|
estimation[channel_num] = 0;
|
||||||
|
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
|
||||||
|
weights_sum[channel_num] = 0;
|
||||||
|
|
||||||
for (int y = 0; y < search_window_size_; y++)
|
for (int y = 0; y < search_window_size_; y++)
|
||||||
{
|
{
|
||||||
@ -238,24 +236,21 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
|||||||
for (int x = 0; x < search_window_size_; x++)
|
for (int x = 0; x < search_window_size_; x++)
|
||||||
{
|
{
|
||||||
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
|
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
|
||||||
int weight = almost_dist2weight_[almostAvgDist];
|
WT weight = almost_dist2weight_[almostAvgDist];
|
||||||
weights_sum += weight;
|
|
||||||
|
|
||||||
T p = cur_row_ptr[border_size_ + search_window_x + x];
|
T p = cur_row_ptr[border_size_ + search_window_x + x];
|
||||||
incWithWeight(estimation, weight, p);
|
incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
|
||||||
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum;
|
weights_sum);
|
||||||
|
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
||||||
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
|
||||||
int i,
|
int i,
|
||||||
Array2d<int>& dist_sums,
|
Array2d<int>& dist_sums,
|
||||||
Array3d<int>& col_dist_sums,
|
Array3d<int>& col_dist_sums,
|
||||||
@ -276,7 +271,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
|||||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||||
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
|
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
|
||||||
{
|
{
|
||||||
int dist = calcDist<T>(extended_src_,
|
int dist = D::template calcDist<T>(extended_src_,
|
||||||
border_size_ + i + ty, border_size_ + j + tx,
|
border_size_ + i + ty, border_size_ + j + tx,
|
||||||
border_size_ + start_y + ty, border_size_ + start_x + tx);
|
border_size_ + start_y + ty, border_size_ + start_x + tx);
|
||||||
|
|
||||||
@ -288,8 +283,8 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
|
||||||
int i, int j, int first_col_num,
|
int i, int j, int first_col_num,
|
||||||
Array2d<int>& dist_sums,
|
Array2d<int>& dist_sums,
|
||||||
Array3d<int>& col_dist_sums,
|
Array3d<int>& col_dist_sums,
|
||||||
@ -312,7 +307,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
|||||||
int by = start_by + y;
|
int by = start_by + y;
|
||||||
int bx = start_bx + x;
|
int bx = start_bx + x;
|
||||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||||
col_dist_sums[new_last_col_num][y][x] += calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
|
col_dist_sums[new_last_col_num][y][x] += D::template calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
|
||||||
|
|
||||||
dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
|
dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
|
||||||
up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
|
up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
|
||||||
|
@ -44,118 +44,438 @@
|
|||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
template <typename T> static inline int calcDist(const T a, const T b);
|
template <typename T> struct pixelInfo_
|
||||||
|
|
||||||
template <> inline int calcDist(const uchar a, const uchar b)
|
|
||||||
{
|
{
|
||||||
return (a-b) * (a-b);
|
static const int channels = 1;
|
||||||
|
typedef T sampleType;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET, int n> struct pixelInfo_<Vec<ET, n> >
|
||||||
|
{
|
||||||
|
static const int channels = n;
|
||||||
|
typedef ET sampleType;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct pixelInfo: public pixelInfo_<T>
|
||||||
|
{
|
||||||
|
using typename pixelInfo_<T>::sampleType;
|
||||||
|
|
||||||
|
static inline sampleType sampleMax()
|
||||||
|
{
|
||||||
|
return std::numeric_limits<sampleType>::max();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline sampleType sampleMin()
|
||||||
|
{
|
||||||
|
return std::numeric_limits<sampleType>::min();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t sampleBytes()
|
||||||
|
{
|
||||||
|
return sizeof(sampleType);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t sampleBits()
|
||||||
|
{
|
||||||
|
return 8*sampleBytes();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class DistAbs
|
||||||
|
{
|
||||||
|
template <typename T> struct calcDist_
|
||||||
|
{
|
||||||
|
static inline int f(const T a, const T b)
|
||||||
|
{
|
||||||
|
return std::abs((int)(a-b));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET> struct calcDist_<Vec<ET, 2> >
|
||||||
|
{
|
||||||
|
static inline int f(const Vec<ET, 2> a, const Vec<ET, 2> b)
|
||||||
|
{
|
||||||
|
return std::abs((int)(a[0]-b[0])) + std::abs((int)(a[1]-b[1]));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET> struct calcDist_<Vec<ET, 3> >
|
||||||
|
{
|
||||||
|
static inline int f(const Vec<ET, 3> a, const Vec<ET, 3> b)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
std::abs((int)(a[0]-b[0])) +
|
||||||
|
std::abs((int)(a[1]-b[1])) +
|
||||||
|
std::abs((int)(a[2]-b[2]));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET> struct calcDist_<Vec<ET, 4> >
|
||||||
|
{
|
||||||
|
static inline int f(const Vec<ET, 4> a, const Vec<ET, 4> b)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
std::abs((int)(a[0]-b[0])) +
|
||||||
|
std::abs((int)(a[1]-b[1])) +
|
||||||
|
std::abs((int)(a[2]-b[2])) +
|
||||||
|
std::abs((int)(a[3]-b[3]));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename WT> struct calcWeight_
|
||||||
|
{
|
||||||
|
static inline WT f(double dist, const float *h, WT fixed_point_mult)
|
||||||
|
{
|
||||||
|
double w = std::exp(-dist*dist / (h[0]*h[0] * pixelInfo<T>::channels));
|
||||||
|
if (std::isnan(w)) w = 1.0; // Handle h = 0.0
|
||||||
|
|
||||||
|
static const double WEIGHT_THRESHOLD = 0.001;
|
||||||
|
WT weight = (WT)round(fixed_point_mult * w);
|
||||||
|
if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0;
|
||||||
|
|
||||||
|
return weight;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
|
||||||
|
{
|
||||||
|
static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
|
||||||
|
{
|
||||||
|
Vec<ET, n> res;
|
||||||
|
for (int i=0; i<n; i++)
|
||||||
|
res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
template <typename T> static inline int calcDist(const T a, const T b)
|
||||||
|
{
|
||||||
|
return calcDist_<T>::f(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
|
||||||
|
{
|
||||||
|
const T a = m.at<T>(i1, j1);
|
||||||
|
const T b = m.at<T>(i2, j2);
|
||||||
|
return calcDist<T>(a,b);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
|
||||||
|
{
|
||||||
|
return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename WT>
|
||||||
|
static inline WT calcWeight(double dist, const float *h,
|
||||||
|
typename pixelInfo<WT>::sampleType fixed_point_mult)
|
||||||
|
{
|
||||||
|
return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static inline int maxDist()
|
||||||
|
{
|
||||||
|
return (int)pixelInfo<T>::sampleMax() * pixelInfo<T>::channels;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class DistSquared
|
||||||
|
{
|
||||||
|
template <typename T> struct calcDist_
|
||||||
|
{
|
||||||
|
static inline int f(const T a, const T b)
|
||||||
|
{
|
||||||
|
return (int)(a-b) * (int)(a-b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET> struct calcDist_<Vec<ET, 2> >
|
||||||
|
{
|
||||||
|
static inline int f(const Vec<ET, 2> a, const Vec<ET, 2> b)
|
||||||
|
{
|
||||||
|
return (int)(a[0]-b[0])*(int)(a[0]-b[0]) + (int)(a[1]-b[1])*(int)(a[1]-b[1]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET> struct calcDist_<Vec<ET, 3> >
|
||||||
|
{
|
||||||
|
static inline int f(const Vec<ET, 3> a, const Vec<ET, 3> b)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
(int)(a[0]-b[0])*(int)(a[0]-b[0]) +
|
||||||
|
(int)(a[1]-b[1])*(int)(a[1]-b[1]) +
|
||||||
|
(int)(a[2]-b[2])*(int)(a[2]-b[2]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET> struct calcDist_<Vec<ET, 4> >
|
||||||
|
{
|
||||||
|
static inline int f(const Vec<ET, 4> a, const Vec<ET, 4> b)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
(int)(a[0]-b[0])*(int)(a[0]-b[0]) +
|
||||||
|
(int)(a[1]-b[1])*(int)(a[1]-b[1]) +
|
||||||
|
(int)(a[2]-b[2])*(int)(a[2]-b[2]) +
|
||||||
|
(int)(a[3]-b[3])*(int)(a[3]-b[3]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct calcUpDownDist_
|
||||||
|
{
|
||||||
|
static inline int f(T a_up, T a_down, T b_up, T b_down)
|
||||||
|
{
|
||||||
|
int A = a_down - b_down;
|
||||||
|
int B = a_up - b_up;
|
||||||
|
return (A-B)*(A+B);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET, int n> struct calcUpDownDist_<Vec<ET, n> >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
typedef Vec<ET, n> T;
|
||||||
|
public:
|
||||||
|
static inline int f(T a_up, T a_down, T b_up, T b_down)
|
||||||
|
{
|
||||||
|
return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename WT> struct calcWeight_
|
||||||
|
{
|
||||||
|
static inline WT f(double dist, const float *h, WT fixed_point_mult)
|
||||||
|
{
|
||||||
|
double w = std::exp(-dist / (h[0]*h[0] * pixelInfo<T>::channels));
|
||||||
|
if (std::isnan(w)) w = 1.0; // Handle h = 0.0
|
||||||
|
|
||||||
|
static const double WEIGHT_THRESHOLD = 0.001;
|
||||||
|
WT weight = (WT)round(fixed_point_mult * w);
|
||||||
|
if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0;
|
||||||
|
|
||||||
|
return weight;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
|
||||||
|
{
|
||||||
|
static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
|
||||||
|
{
|
||||||
|
Vec<ET, n> res;
|
||||||
|
for (int i=0; i<n; i++)
|
||||||
|
res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
template <typename T> static inline int calcDist(const T a, const T b)
|
||||||
|
{
|
||||||
|
return calcDist_<T>::f(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
|
||||||
|
{
|
||||||
|
const T a = m.at<T>(i1, j1);
|
||||||
|
const T b = m.at<T>(i2, j2);
|
||||||
|
return calcDist<T>(a,b);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
|
||||||
|
{
|
||||||
|
return calcUpDownDist_<T>::f(a_up, a_down, b_up, b_down);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename WT>
|
||||||
|
static inline WT calcWeight(double dist, const float *h,
|
||||||
|
typename pixelInfo<WT>::sampleType fixed_point_mult)
|
||||||
|
{
|
||||||
|
return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static inline int maxDist()
|
||||||
|
{
|
||||||
|
return (int)pixelInfo<T>::sampleMax() * (int)pixelInfo<T>::sampleMax() *
|
||||||
|
pixelInfo<T>::channels;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename IT, typename WT> struct incWithWeight_
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum, WT weight, T p)
|
||||||
|
{
|
||||||
|
estimation[0] += (IT)weight * p;
|
||||||
|
weights_sum[0] += (IT)weight;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 2>, IT, WT>
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 2> p)
|
||||||
|
{
|
||||||
|
estimation[0] += (IT)weight * p[0];
|
||||||
|
estimation[1] += (IT)weight * p[1];
|
||||||
|
weights_sum[0] += (IT)weight;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 3>, IT, WT>
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 3> p)
|
||||||
|
{
|
||||||
|
estimation[0] += (IT)weight * p[0];
|
||||||
|
estimation[1] += (IT)weight * p[1];
|
||||||
|
estimation[2] += (IT)weight * p[2];
|
||||||
|
weights_sum[0] += (IT)weight;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 4>, IT, WT>
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 4> p)
|
||||||
|
{
|
||||||
|
estimation[0] += (IT)weight * p[0];
|
||||||
|
estimation[1] += (IT)weight * p[1];
|
||||||
|
estimation[2] += (IT)weight * p[2];
|
||||||
|
estimation[3] += (IT)weight * p[3];
|
||||||
|
weights_sum[0] += (IT)weight;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 2>, IT, Vec<EW, 2> >
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 2> weight, Vec<ET, 2> p)
|
||||||
|
{
|
||||||
|
estimation[0] += (IT)weight[0] * p[0];
|
||||||
|
estimation[1] += (IT)weight[1] * p[1];
|
||||||
|
weights_sum[0] += (IT)weight[0];
|
||||||
|
weights_sum[1] += (IT)weight[1];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 3>, IT, Vec<EW, 3> >
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 3> weight, Vec<ET, 3> p)
|
||||||
|
{
|
||||||
|
estimation[0] += (IT)weight[0] * p[0];
|
||||||
|
estimation[1] += (IT)weight[1] * p[1];
|
||||||
|
estimation[2] += (IT)weight[2] * p[2];
|
||||||
|
weights_sum[0] += (IT)weight[0];
|
||||||
|
weights_sum[1] += (IT)weight[1];
|
||||||
|
weights_sum[2] += (IT)weight[2];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 4>, IT, Vec<EW, 4> >
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 4> weight, Vec<ET, 4> p)
|
||||||
|
{
|
||||||
|
estimation[0] += (IT)weight[0] * p[0];
|
||||||
|
estimation[1] += (IT)weight[1] * p[1];
|
||||||
|
estimation[2] += (IT)weight[2] * p[2];
|
||||||
|
estimation[3] += (IT)weight[3] * p[3];
|
||||||
|
weights_sum[0] += (IT)weight[0];
|
||||||
|
weights_sum[1] += (IT)weight[1];
|
||||||
|
weights_sum[2] += (IT)weight[2];
|
||||||
|
weights_sum[3] += (IT)weight[3];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename IT, typename WT>
|
||||||
|
static inline void incWithWeight(IT* estimation, IT* weights_sum, WT weight, T p)
|
||||||
|
{
|
||||||
|
return incWithWeight_<T, IT, WT>::f(estimation, weights_sum, weight, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> inline int calcDist(const Vec2b a, const Vec2b b)
|
template <typename IT, typename UIT, int nc, int nw> struct divByWeightsSum_
|
||||||
{
|
{
|
||||||
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]);
|
static inline void f(IT* estimation, IT* weights_sum);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename IT, typename UIT> struct divByWeightsSum_<IT, UIT, 1, 1>
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum)
|
||||||
|
{
|
||||||
|
estimation[0] = (static_cast<UIT>(estimation[0]) + weights_sum[0]/2) / weights_sum[0];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, 1>
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[0]/2) / weights_sum[0];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, n>
|
||||||
|
{
|
||||||
|
static inline void f(IT* estimation, IT* weights_sum)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[i]/2) / weights_sum[i];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename IT, typename UIT, int nc, int nw>
|
||||||
|
static inline void divByWeightsSum(IT* estimation, IT* weights_sum)
|
||||||
|
{
|
||||||
|
return divByWeightsSum_<IT, UIT, nc, nw>::f(estimation, weights_sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> inline int calcDist(const Vec3b a, const Vec3b b)
|
template <typename T, typename IT> struct saturateCastFromArray_
|
||||||
{
|
{
|
||||||
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]);
|
static inline T f(IT* estimation)
|
||||||
}
|
{
|
||||||
|
return saturate_cast<T>(estimation[0]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
|
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 2>, IT>
|
||||||
{
|
{
|
||||||
const T a = m.at<T>(i1, j1);
|
static inline Vec<ET, 2> f(IT* estimation)
|
||||||
const T b = m.at<T>(i2, j2);
|
{
|
||||||
return calcDist<T>(a,b);
|
Vec<ET, 2> res;
|
||||||
}
|
res[0] = saturate_cast<ET>(estimation[0]);
|
||||||
|
res[1] = saturate_cast<ET>(estimation[1]);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
|
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 3>, IT>
|
||||||
{
|
{
|
||||||
return calcDist(a_down, b_down) - calcDist(a_up, b_up);
|
static inline Vec<ET, 3> f(IT* estimation)
|
||||||
}
|
{
|
||||||
|
Vec<ET, 3> res;
|
||||||
|
res[0] = saturate_cast<ET>(estimation[0]);
|
||||||
|
res[1] = saturate_cast<ET>(estimation[1]);
|
||||||
|
res[2] = saturate_cast<ET>(estimation[2]);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down)
|
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 4>, IT>
|
||||||
{
|
{
|
||||||
int A = a_down - b_down;
|
static inline Vec<ET, 4> f(IT* estimation)
|
||||||
int B = a_up - b_up;
|
{
|
||||||
return (A-B)*(A+B);
|
Vec<ET, 4> res;
|
||||||
}
|
res[0] = saturate_cast<ET>(estimation[0]);
|
||||||
|
res[1] = saturate_cast<ET>(estimation[1]);
|
||||||
|
res[2] = saturate_cast<ET>(estimation[2]);
|
||||||
|
res[3] = saturate_cast<ET>(estimation[3]);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <typename T> static inline void incWithWeight(int* estimation, int weight, T p);
|
template <typename T, typename IT> static inline T saturateCastFromArray(IT* estimation)
|
||||||
|
|
||||||
template <> inline void incWithWeight(int* estimation, int weight, uchar p)
|
|
||||||
{
|
{
|
||||||
estimation[0] += weight * p;
|
return saturateCastFromArray_<T, IT>::f(estimation);
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline void incWithWeight(int* estimation, int weight, Vec2b p)
|
|
||||||
{
|
|
||||||
estimation[0] += weight * p[0];
|
|
||||||
estimation[1] += weight * p[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline void incWithWeight(int* estimation, int weight, Vec3b p)
|
|
||||||
{
|
|
||||||
estimation[0] += weight * p[0];
|
|
||||||
estimation[1] += weight * p[1];
|
|
||||||
estimation[2] += weight * p[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline void incWithWeight(int* estimation, int weight, int p)
|
|
||||||
{
|
|
||||||
estimation[0] += weight * p;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline void incWithWeight(int* estimation, int weight, Vec2i p)
|
|
||||||
{
|
|
||||||
estimation[0] += weight * p[0];
|
|
||||||
estimation[1] += weight * p[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline void incWithWeight(int* estimation, int weight, Vec3i p)
|
|
||||||
{
|
|
||||||
estimation[0] += weight * p[0];
|
|
||||||
estimation[1] += weight * p[1];
|
|
||||||
estimation[2] += weight * p[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T> static inline T saturateCastFromArray(int* estimation);
|
|
||||||
|
|
||||||
template <> inline uchar saturateCastFromArray(int* estimation)
|
|
||||||
{
|
|
||||||
return saturate_cast<uchar>(estimation[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline Vec2b saturateCastFromArray(int* estimation)
|
|
||||||
{
|
|
||||||
Vec2b res;
|
|
||||||
res[0] = saturate_cast<uchar>(estimation[0]);
|
|
||||||
res[1] = saturate_cast<uchar>(estimation[1]);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline Vec3b saturateCastFromArray(int* estimation)
|
|
||||||
{
|
|
||||||
Vec3b res;
|
|
||||||
res[0] = saturate_cast<uchar>(estimation[0]);
|
|
||||||
res[1] = saturate_cast<uchar>(estimation[1]);
|
|
||||||
res[2] = saturate_cast<uchar>(estimation[2]);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline int saturateCastFromArray(int* estimation)
|
|
||||||
{
|
|
||||||
return estimation[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline Vec2i saturateCastFromArray(int* estimation)
|
|
||||||
{
|
|
||||||
estimation[1] = 0;
|
|
||||||
return Vec2i(estimation);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <> inline Vec3i saturateCastFromArray(int* estimation)
|
|
||||||
{
|
|
||||||
return Vec3i(estimation);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -28,12 +28,16 @@ static int divUp(int a, int b)
|
|||||||
return (a + b - 1) / b;
|
return (a + b - 1) / b;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename FT>
|
template <typename FT, typename ST, typename WT>
|
||||||
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
|
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,
|
||||||
|
int searchWindowSize, int templateWindowSize,
|
||||||
|
const FT *h, int hn, int cn, int normType,
|
||||||
int & almostTemplateWindowSizeSqBinShift)
|
int & almostTemplateWindowSizeSqBinShift)
|
||||||
{
|
{
|
||||||
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
|
const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *
|
||||||
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
|
std::numeric_limits<ST>::max();
|
||||||
|
int fixedPointMult = (int)std::min<WT>(std::numeric_limits<WT>::max() / maxEstimateSumValue,
|
||||||
|
std::numeric_limits<int>::max());
|
||||||
int depth = DataType<FT>::depth;
|
int depth = DataType<FT>::depth;
|
||||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||||
|
|
||||||
@ -48,33 +52,44 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
|
|||||||
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
|
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
|
||||||
|
|
||||||
const FT WEIGHT_THRESHOLD = 1e-3f;
|
const FT WEIGHT_THRESHOLD = 1e-3f;
|
||||||
int maxDist = 255 * 255 * cn;
|
int maxDist = normType == NORM_L1 ? std::numeric_limits<ST>::max() * cn :
|
||||||
|
std::numeric_limits<ST>::max() * std::numeric_limits<ST>::max() * cn;
|
||||||
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
|
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
|
||||||
FT den = 1.0f / (h * h * cn);
|
FT den[4];
|
||||||
|
CV_Assert(hn > 0 && hn <= 4);
|
||||||
|
for (int i=0; i<hn; i++)
|
||||||
|
den[i] = 1.0f / (h[i] * h[i] * cn);
|
||||||
|
|
||||||
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
|
almostDist2Weight.create(1, almostMaxDist, CV_32SC(hn == 3 ? 4 : hn));
|
||||||
|
|
||||||
|
char buf[40];
|
||||||
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
|
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
|
||||||
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
|
format("-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"
|
||||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
" -D wlut_t=%s -D convert_wlut_t=%s%s%s",
|
||||||
|
ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),
|
||||||
|
ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),
|
||||||
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||||
|
normType == NORM_L1 ? " -D ABS" : ""));
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
|
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
|
||||||
almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);
|
almostDist2ActualDistMultiplier, fixedPointMult,
|
||||||
|
ocl::KernelArg::Constant(den, (hn == 3 ? 4 : hn)*sizeof(FT)), WEIGHT_THRESHOLD);
|
||||||
|
|
||||||
size_t globalsize[1] = { almostMaxDist };
|
size_t globalsize[1] = { almostMaxDist };
|
||||||
return k.run(1, globalsize, NULL, false);
|
return k.run(1, globalsize, NULL, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
|
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,
|
||||||
int templateWindowSize, int searchWindowSize)
|
int templateWindowSize, int searchWindowSize, int normType)
|
||||||
{
|
{
|
||||||
int type = _src.type(), cn = CV_MAT_CN(type);
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
|
int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
|
||||||
Size size = _src.size();
|
Size size = _src.size();
|
||||||
|
|
||||||
if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC4 )
|
if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&
|
||||||
|
(normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int templateWindowHalfWize = templateWindowSize / 2;
|
int templateWindowHalfWize = templateWindowSize / 2;
|
||||||
@ -84,33 +99,68 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
|
|||||||
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
|
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
|
||||||
int almostTemplateWindowSizeSqBinShift = -1;
|
int almostTemplateWindowSizeSqBinShift = -1;
|
||||||
|
|
||||||
char cvt[2][40];
|
char buf[4][40];
|
||||||
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
|
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
|
||||||
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
|
" -D pixel_t=%s -D int_t=%s -D wlut_t=%s"
|
||||||
|
" -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
|
||||||
|
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
|
||||||
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
|
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
|
||||||
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s",
|
" -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s",
|
||||||
templateWindowSize, searchWindowSize, ocl::typeToStr(type),
|
templateWindowSize, searchWindowSize,
|
||||||
ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize,
|
ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
|
||||||
templateWindowHalfWize, searchWindowHalfSize,
|
ocl::typeToStr(CV_32SC(hn)),
|
||||||
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
|
depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) :
|
||||||
ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));
|
format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
|
||||||
|
depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) :
|
||||||
|
format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
|
||||||
|
depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
|
||||||
|
format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
|
||||||
|
depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) :
|
||||||
|
format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
|
||||||
|
BLOCK_COLS, BLOCK_ROWS,
|
||||||
|
ctaSize, templateWindowHalfWize, searchWindowHalfSize,
|
||||||
|
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
|
||||||
|
(depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
|
||||||
|
ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),
|
||||||
|
normType == NORM_L1 ? " -D ABS" : "");
|
||||||
|
|
||||||
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
|
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
UMat almostDist2Weight;
|
UMat almostDist2Weight;
|
||||||
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
|
if ((depth == CV_8U &&
|
||||||
almostTemplateWindowSizeSqBinShift))
|
!ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,
|
||||||
|
searchWindowSize, templateWindowSize,
|
||||||
|
h, hn, cn, normType,
|
||||||
|
almostTemplateWindowSizeSqBinShift)) ||
|
||||||
|
(depth == CV_16U &&
|
||||||
|
!ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,
|
||||||
|
searchWindowSize, templateWindowSize,
|
||||||
|
h, hn, cn, normType,
|
||||||
|
almostTemplateWindowSizeSqBinShift)))
|
||||||
return false;
|
return false;
|
||||||
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
|
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
|
||||||
|
|
||||||
UMat srcex;
|
UMat srcex;
|
||||||
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
|
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
|
||||||
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
|
if (cn == 3) {
|
||||||
|
srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4));
|
||||||
|
UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height));
|
||||||
|
int from_to[] = { 0,0, 1,1, 2,2 };
|
||||||
|
mixChannels(std::vector<UMat>(1, _src.getUMat()), std::vector<UMat>(1, src), from_to, 3);
|
||||||
|
copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize,
|
||||||
|
BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place
|
||||||
|
}
|
||||||
|
else
|
||||||
|
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
|
||||||
|
|
||||||
_dst.create(size, type);
|
_dst.create(size, type);
|
||||||
UMat dst = _dst.getUMat();
|
UMat dst;
|
||||||
|
if (cn == 3)
|
||||||
|
dst.create(size, CV_MAKE_TYPE(depth, 4));
|
||||||
|
else
|
||||||
|
dst = _dst.getUMat();
|
||||||
|
|
||||||
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
|
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
|
||||||
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
|
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
|
||||||
@ -123,7 +173,14 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
|
|||||||
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
|
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
|
||||||
|
|
||||||
size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 };
|
size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 };
|
||||||
return k.run(2, globalsize, localsize, false);
|
if (!k.run(2, globalsize, localsize, false)) return false;
|
||||||
|
|
||||||
|
if (cn == 3) {
|
||||||
|
int from_to[] = { 0,0, 1,1, 2,2 };
|
||||||
|
mixChannels(std::vector<UMat>(1, dst), std::vector<UMat>(1, _dst.getUMat()), from_to, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
||||||
|
@ -50,14 +50,14 @@
|
|||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
struct FastNlMeansMultiDenoisingInvoker :
|
struct FastNlMeansMultiDenoisingInvoker :
|
||||||
ParallelLoopBody
|
ParallelLoopBody
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
|
FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
|
||||||
int temporalWindowSize, Mat& dst, int template_window_size,
|
int temporalWindowSize, Mat& dst, int template_window_size,
|
||||||
int search_window_size, const float h);
|
int search_window_size, const float *h);
|
||||||
|
|
||||||
void operator() (const Range& range) const;
|
void operator() (const Range& range) const;
|
||||||
|
|
||||||
@ -81,9 +81,9 @@ private:
|
|||||||
int search_window_half_size_;
|
int search_window_half_size_;
|
||||||
int temporal_window_half_size_;
|
int temporal_window_half_size_;
|
||||||
|
|
||||||
int fixed_point_mult_;
|
typename pixelInfo<WT>::sampleType fixed_point_mult_;
|
||||||
int almost_template_window_size_sq_bin_shift;
|
int almost_template_window_size_sq_bin_shift;
|
||||||
std::vector<int> almost_dist2weight;
|
std::vector<WT> almost_dist2weight;
|
||||||
|
|
||||||
void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
|
void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
|
||||||
Array4d<int>& col_dist_sums,
|
Array4d<int>& col_dist_sums,
|
||||||
@ -94,19 +94,19 @@ private:
|
|||||||
Array4d<int>& up_col_dist_sums) const;
|
Array4d<int>& up_col_dist_sums) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansMultiDenoisingInvoker(
|
||||||
const std::vector<Mat>& srcImgs,
|
const std::vector<Mat>& srcImgs,
|
||||||
int imgToDenoiseIndex,
|
int imgToDenoiseIndex,
|
||||||
int temporalWindowSize,
|
int temporalWindowSize,
|
||||||
cv::Mat& dst,
|
cv::Mat& dst,
|
||||||
int template_window_size,
|
int template_window_size,
|
||||||
int search_window_size,
|
int search_window_size,
|
||||||
const float h) :
|
const float *h) :
|
||||||
dst_(dst), extended_srcs_(srcImgs.size())
|
dst_(dst), extended_srcs_(srcImgs.size())
|
||||||
{
|
{
|
||||||
CV_Assert(srcImgs.size() > 0);
|
CV_Assert(srcImgs.size() > 0);
|
||||||
CV_Assert(srcImgs[0].channels() == sizeof(T));
|
CV_Assert(srcImgs[0].channels() == pixelInfo<T>::channels);
|
||||||
|
|
||||||
rows_ = srcImgs[0].rows;
|
rows_ = srcImgs[0].rows;
|
||||||
cols_ = srcImgs[0].cols;
|
cols_ = srcImgs[0].cols;
|
||||||
@ -125,8 +125,10 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
|||||||
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
|
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
|
||||||
|
|
||||||
main_extended_src_ = extended_srcs_[temporal_window_half_size_];
|
main_extended_src_ = extended_srcs_[temporal_window_half_size_];
|
||||||
const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255;
|
const IT max_estimate_sum_value =
|
||||||
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
|
(IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
|
||||||
|
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
|
||||||
|
pixelInfo<WT>::sampleMax());
|
||||||
|
|
||||||
// precalc weight for every possible l2 dist between blocks
|
// precalc weight for every possible l2 dist between blocks
|
||||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||||
@ -138,30 +140,24 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
|||||||
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
|
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
|
||||||
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
|
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
|
||||||
|
|
||||||
int max_dist = 255 * 255 * sizeof(T);
|
int max_dist = D::template maxDist<T>();
|
||||||
int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1);
|
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
|
||||||
almost_dist2weight.resize(almost_max_dist);
|
almost_dist2weight.resize(almost_max_dist);
|
||||||
|
|
||||||
const double WEIGHT_THRESHOLD = 0.001;
|
|
||||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||||
{
|
{
|
||||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||||
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
almost_dist2weight[almost_dist] =
|
||||||
|
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
|
||||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
|
||||||
weight = 0;
|
|
||||||
|
|
||||||
almost_dist2weight[almost_dist] = weight;
|
|
||||||
}
|
}
|
||||||
CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
|
|
||||||
|
|
||||||
// additional optimization init end
|
// additional optimization init end
|
||||||
if (dst_.empty())
|
if (dst_.empty())
|
||||||
dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
|
dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
|
||||||
{
|
{
|
||||||
int row_from = range.start;
|
int row_from = range.start;
|
||||||
int row_to = range.end - 1;
|
int row_to = range.end - 1;
|
||||||
@ -234,7 +230,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
|||||||
dist_sums_row[x] -= col_dist_sums_row[x];
|
dist_sums_row[x] -= col_dist_sums_row[x];
|
||||||
|
|
||||||
col_dist_sums_row[x] = up_col_dist_sums_row[x] +
|
col_dist_sums_row[x] = up_col_dist_sums_row[x] +
|
||||||
calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
|
D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
|
||||||
|
|
||||||
dist_sums_row[x] += col_dist_sums_row[x];
|
dist_sums_row[x] += col_dist_sums_row[x];
|
||||||
up_col_dist_sums_row[x] = col_dist_sums_row[x];
|
up_col_dist_sums_row[x] = col_dist_sums_row[x];
|
||||||
@ -247,11 +243,11 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
// calc weights
|
// calc weights
|
||||||
int weights_sum = 0;
|
IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
|
||||||
|
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||||
int estimation[3];
|
|
||||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
|
||||||
estimation[channel_num] = 0;
|
estimation[channel_num] = 0;
|
||||||
|
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
|
||||||
|
weights_sum[channel_num] = 0;
|
||||||
|
|
||||||
for (int d = 0; d < temporal_window_size_; d++)
|
for (int d = 0; d < temporal_window_size_; d++)
|
||||||
{
|
{
|
||||||
@ -266,26 +262,22 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
|||||||
{
|
{
|
||||||
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
|
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
|
||||||
|
|
||||||
int weight = almost_dist2weight[almostAvgDist];
|
WT weight = almost_dist2weight[almostAvgDist];
|
||||||
weights_sum += weight;
|
|
||||||
|
|
||||||
T p = cur_row_ptr[border_size_ + search_window_x + x];
|
T p = cur_row_ptr[border_size_ + search_window_x + x];
|
||||||
incWithWeight(estimation, weight, p);
|
incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
|
||||||
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum;
|
weights_sum);
|
||||||
|
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
||||||
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
|
||||||
int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
|
int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
|
||||||
{
|
{
|
||||||
int j = 0;
|
int j = 0;
|
||||||
@ -310,7 +302,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
|
|||||||
{
|
{
|
||||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||||
{
|
{
|
||||||
int dist = calcDist<T>(
|
int dist = D::template calcDist<T>(
|
||||||
main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
|
main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
|
||||||
cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
|
cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
|
||||||
|
|
||||||
@ -325,8 +317,8 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||||
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
|
||||||
int i, int j, int first_col_num, Array3d<int>& dist_sums,
|
int i, int j, int first_col_num, Array3d<int>& dist_sums,
|
||||||
Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
|
Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
|
||||||
{
|
{
|
||||||
@ -353,7 +345,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
|
|||||||
int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
|
int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
|
||||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||||
{
|
{
|
||||||
*col_dist_sums_ptr += calcDist<T>(
|
*col_dist_sums_ptr += D::template calcDist<T>(
|
||||||
main_extended_src_.at<T>(ay + ty, ax),
|
main_extended_src_.at<T>(ay + ty, ax),
|
||||||
cur_extended_src.at<T>(by + ty, bx));
|
cur_extended_src.at<T>(by + ty, bx));
|
||||||
}
|
}
|
||||||
|
@ -20,21 +20,23 @@
|
|||||||
|
|
||||||
#ifdef OP_CALC_WEIGHTS
|
#ifdef OP_CALC_WEIGHTS
|
||||||
|
|
||||||
__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist,
|
__kernel void calcAlmostDist2Weight(__global wlut_t * almostDist2Weight, int almostMaxDist,
|
||||||
FT almostDist2ActualDistMultiplier, int fixedPointMult,
|
FT almostDist2ActualDistMultiplier, int fixedPointMult,
|
||||||
FT den, FT WEIGHT_THRESHOLD)
|
w_t den, FT WEIGHT_THRESHOLD)
|
||||||
{
|
{
|
||||||
int almostDist = get_global_id(0);
|
int almostDist = get_global_id(0);
|
||||||
|
|
||||||
if (almostDist < almostMaxDist)
|
if (almostDist < almostMaxDist)
|
||||||
{
|
{
|
||||||
FT dist = almostDist * almostDist2ActualDistMultiplier;
|
FT dist = almostDist * almostDist2ActualDistMultiplier;
|
||||||
int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den));
|
#ifdef ABS
|
||||||
|
w_t w = exp((w_t)(-dist*dist) * den);
|
||||||
if (weight < WEIGHT_THRESHOLD * fixedPointMult)
|
#else
|
||||||
weight = 0;
|
w_t w = exp((w_t)(-dist) * den);
|
||||||
|
#endif
|
||||||
almostDist2Weight[almostDist] = weight;
|
wlut_t weight = convert_wlut_t(fixedPointMult * (isnan(w) ? (w_t)1.0 : w));
|
||||||
|
almostDist2Weight[almostDist] =
|
||||||
|
weight < (wlut_t)(WEIGHT_THRESHOLD * fixedPointMult) ? (wlut_t)0 : weight;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,21 +46,35 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost
|
|||||||
|
|
||||||
#define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE)
|
#define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE)
|
||||||
|
|
||||||
inline int calcDist(uchar_t a, uchar_t b)
|
inline int calcDist(pixel_t a, pixel_t b)
|
||||||
{
|
{
|
||||||
|
#ifdef ABS
|
||||||
|
int_t retval = convert_int_t(abs_diff(a, b));
|
||||||
|
#else
|
||||||
int_t diff = convert_int_t(a) - convert_int_t(b);
|
int_t diff = convert_int_t(a) - convert_int_t(b);
|
||||||
int_t retval = diff * diff;
|
int_t retval = diff * diff;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if cn == 1
|
#if cn == 1
|
||||||
return retval;
|
return retval;
|
||||||
#elif cn == 2
|
#elif cn == 2
|
||||||
return retval.x + retval.y;
|
return retval.x + retval.y;
|
||||||
|
#elif cn == 3
|
||||||
|
return retval.x + retval.y + retval.z;
|
||||||
|
#elif cn == 4
|
||||||
|
return retval.x + retval.y + retval.z + retval.w;
|
||||||
#else
|
#else
|
||||||
#error "cn should be either 1 or 2"
|
#error "cn should be either 1, 2, 3 or 4"
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t)
|
#ifdef ABS
|
||||||
|
inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
|
||||||
|
{
|
||||||
|
return calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
|
||||||
{
|
{
|
||||||
int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);
|
int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);
|
||||||
int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);
|
int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);
|
||||||
@ -68,10 +84,15 @@ inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_v
|
|||||||
return retval;
|
return retval;
|
||||||
#elif cn == 2
|
#elif cn == 2
|
||||||
return retval.x + retval.y;
|
return retval.x + retval.y;
|
||||||
|
#elif cn == 3
|
||||||
|
return retval.x + retval.y + retval.z;
|
||||||
|
#elif cn == 4
|
||||||
|
return retval.x + retval.y + retval.z + retval.w;
|
||||||
#else
|
#else
|
||||||
#error "cn should be either 1 or 2"
|
#error "cn should be either 1, 2, 3 or 4"
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#define COND if (x == 0 && y == 0)
|
#define COND if (x == 0 && y == 0)
|
||||||
|
|
||||||
@ -87,9 +108,9 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
|
|||||||
{
|
{
|
||||||
int dist = 0, value;
|
int dist = 0, value;
|
||||||
|
|
||||||
__global const uchar_t * src_template = (__global const uchar_t *)(src +
|
__global const pixel_t * src_template = (__global const pixel_t *)(src +
|
||||||
mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
|
mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));
|
||||||
__global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
|
__global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));
|
||||||
__global int * col_dists_current = col_dists + i * TEMPLATE_SIZE;
|
__global int * col_dists_current = col_dists + i * TEMPLATE_SIZE;
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
@ -107,8 +128,8 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
|
|||||||
dist += value;
|
dist += value;
|
||||||
}
|
}
|
||||||
|
|
||||||
src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
|
src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
|
||||||
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
|
src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
@ -130,9 +151,9 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
|
|||||||
|
|
||||||
for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
|
for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
|
||||||
{
|
{
|
||||||
__global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
|
__global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));
|
||||||
__global const uchar_t * src_template = (__global const uchar_t *)(src +
|
__global const pixel_t * src_template = (__global const pixel_t *)(src +
|
||||||
mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
|
mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));
|
||||||
__global int * col_dists_current = col_dists + TEMPLATE_SIZE * i;
|
__global int * col_dists_current = col_dists + TEMPLATE_SIZE * i;
|
||||||
|
|
||||||
int col_dist = 0;
|
int col_dist = 0;
|
||||||
@ -142,8 +163,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
|
|||||||
{
|
{
|
||||||
col_dist += calcDist(src_current[0], src_template[0]);
|
col_dist += calcDist(src_current[0], src_template[0]);
|
||||||
|
|
||||||
src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
|
src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
|
||||||
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
|
src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
|
||||||
}
|
}
|
||||||
|
|
||||||
dists[i] += col_dist - col_dists_current[first];
|
dists[i] += col_dist - col_dists_current[first];
|
||||||
@ -160,8 +181,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
|
|||||||
int sy_up = y - TEMPLATE_SIZE2 - 1;
|
int sy_up = y - TEMPLATE_SIZE2 - 1;
|
||||||
int sy_down = y + TEMPLATE_SIZE2;
|
int sy_down = y + TEMPLATE_SIZE2;
|
||||||
|
|
||||||
uchar_t up_value = *(__global const uchar_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset)));
|
pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(psz, sx, src_offset)));
|
||||||
uchar_t down_value = *(__global const uchar_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset)));
|
pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(psz, sx, src_offset)));
|
||||||
|
|
||||||
sx -= SEARCH_SIZE2;
|
sx -= SEARCH_SIZE2;
|
||||||
sy_up -= SEARCH_SIZE2;
|
sy_up -= SEARCH_SIZE2;
|
||||||
@ -171,8 +192,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
|
|||||||
{
|
{
|
||||||
int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE;
|
int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE;
|
||||||
|
|
||||||
uchar_t up_value_t = *(__global const uchar_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset)));
|
pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(psz, sx + wx, src_offset)));
|
||||||
uchar_t down_value_t = *(__global const uchar_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset)));
|
pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(psz, sx + wx, src_offset)));
|
||||||
|
|
||||||
__global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first);
|
__global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first);
|
||||||
__global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i);
|
__global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i);
|
||||||
@ -186,24 +207,25 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,
|
inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,
|
||||||
__local int * dists, __global const int * almostDist2Weight,
|
__local int * dists, __global const wlut_t * almostDist2Weight,
|
||||||
__global uchar * dst, int dst_step, int dst_offset,
|
__global uchar * dst, int dst_step, int dst_offset,
|
||||||
int y, int x, int id, __local int * weights_local,
|
int y, int x, int id, __local weight_t * weights_local,
|
||||||
__local int_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
|
__local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
|
||||||
{
|
{
|
||||||
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2, weights = 0;
|
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;
|
||||||
int_t weighted_sum = (int_t)(0);
|
weight_t weights = (weight_t)0;
|
||||||
|
sum_t weighted_sum = (sum_t)0;
|
||||||
|
|
||||||
for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
|
for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
|
||||||
{
|
{
|
||||||
int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset));
|
int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, psz, src_offset));
|
||||||
int_t src_value = convert_int_t(*(__global const uchar_t *)(src + src_index));
|
sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index));
|
||||||
|
|
||||||
int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift;
|
int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift;
|
||||||
int weight = almostDist2Weight[almostAvgDist];
|
weight_t weight = convert_weight_t(almostDist2Weight[almostAvgDist]);
|
||||||
|
|
||||||
weights += weight;
|
weights += weight;
|
||||||
weighted_sum += (int_t)(weight) * src_value;
|
weighted_sum += (sum_t)weight * src_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
weights_local[id] = weights;
|
weights_local[id] = weights;
|
||||||
@ -223,26 +245,27 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
|
|||||||
|
|
||||||
if (id == 0)
|
if (id == 0)
|
||||||
{
|
{
|
||||||
int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset));
|
int dst_index = mad24(y, dst_step, mad24(psz, x, dst_offset));
|
||||||
int_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
|
sum_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
|
||||||
weighted_sum_local[2] + weighted_sum_local[3];
|
weighted_sum_local[2] + weighted_sum_local[3];
|
||||||
int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
|
weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
|
||||||
|
|
||||||
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / (int_t)(weights_local_0));
|
*(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)weights_local_0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,
|
__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,
|
||||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||||
__global const int * almostDist2Weight, __global uchar * buffer,
|
__global const wlut_t * almostDist2Weight, __global uchar * buffer,
|
||||||
int almostTemplateWindowSizeSqBinShift)
|
int almostTemplateWindowSizeSqBinShift)
|
||||||
{
|
{
|
||||||
int block_x = get_group_id(0), nblocks_x = get_num_groups(0);
|
int block_x = get_group_id(0), nblocks_x = get_num_groups(0);
|
||||||
int block_y = get_group_id(1);
|
int block_y = get_group_id(1);
|
||||||
int id = get_local_id(0), first;
|
int id = get_local_id(0), first;
|
||||||
|
|
||||||
__local int dists[SEARCH_SIZE_SQ], weights[CTA_SIZE];
|
__local int dists[SEARCH_SIZE_SQ];
|
||||||
__local int_t weighted_sum[CTA_SIZE];
|
__local weight_t weights[CTA_SIZE];
|
||||||
|
__local sum_t weighted_sum[CTA_SIZE];
|
||||||
|
|
||||||
int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols);
|
int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols);
|
||||||
int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows);
|
int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows);
|
||||||
|
@ -13,11 +13,11 @@
|
|||||||
namespace cvtest {
|
namespace cvtest {
|
||||||
namespace ocl {
|
namespace ocl {
|
||||||
|
|
||||||
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
|
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, int, bool, bool)
|
||||||
{
|
{
|
||||||
int cn, templateWindowSize, searchWindowSize;
|
int cn, normType, templateWindowSize, searchWindowSize;
|
||||||
float h;
|
std::vector<float> h;
|
||||||
bool use_roi;
|
bool use_roi, use_image;
|
||||||
|
|
||||||
TEST_DECLARE_INPUT_PARAMETER(src);
|
TEST_DECLARE_INPUT_PARAMETER(src);
|
||||||
TEST_DECLARE_OUTPUT_PARAMETER(dst);
|
TEST_DECLARE_OUTPUT_PARAMETER(dst);
|
||||||
@ -25,29 +25,46 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
|
|||||||
virtual void SetUp()
|
virtual void SetUp()
|
||||||
{
|
{
|
||||||
cn = GET_PARAM(0);
|
cn = GET_PARAM(0);
|
||||||
use_roi = GET_PARAM(1);
|
normType = GET_PARAM(1);
|
||||||
|
use_roi = GET_PARAM(2);
|
||||||
|
use_image = GET_PARAM(3);
|
||||||
|
|
||||||
templateWindowSize = 7;
|
templateWindowSize = 7;
|
||||||
searchWindowSize = 21;
|
searchWindowSize = 21;
|
||||||
h = 3.0f;
|
|
||||||
|
h.resize(cn);
|
||||||
|
for (int i=0; i<cn; i++)
|
||||||
|
h[i] = 3.0f + 0.5f*i;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void generateTestData()
|
virtual void generateTestData()
|
||||||
{
|
{
|
||||||
|
const int type = CV_8UC(cn);
|
||||||
Mat image;
|
Mat image;
|
||||||
if (cn == 1)
|
|
||||||
{
|
if (use_image) {
|
||||||
image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE);
|
image = readImage("denoising/lena_noised_gaussian_sigma=10.png",
|
||||||
|
cn == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
|
||||||
ASSERT_FALSE(image.empty());
|
ASSERT_FALSE(image.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
const int type = CV_8UC(cn);
|
Size roiSize = use_image ? image.size() : randomSize(1, MAX_VALUE);
|
||||||
|
|
||||||
Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE);
|
|
||||||
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
|
randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
|
||||||
if (cn == 1)
|
if (use_image) {
|
||||||
image.copyTo(src_roi);
|
ASSERT_TRUE(cn > 0 && cn <= 4);
|
||||||
|
if (cn == 2) {
|
||||||
|
int from_to[] = { 0,0, 1,1 };
|
||||||
|
src_roi.create(roiSize, type);
|
||||||
|
mixChannels(&image, 1, &src_roi, 1, from_to, 2);
|
||||||
|
}
|
||||||
|
else if (cn == 4) {
|
||||||
|
int from_to[] = { 0,0, 1,1, 2,2, 1,3};
|
||||||
|
src_roi.create(roiSize, type);
|
||||||
|
mixChannels(&image, 1, &src_roi, 1, from_to, 4);
|
||||||
|
}
|
||||||
|
else image.copyTo(src_roi);
|
||||||
|
}
|
||||||
|
|
||||||
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
|
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
|
||||||
@ -65,8 +82,23 @@ OCL_TEST_P(FastNlMeansDenoising, Mat)
|
|||||||
{
|
{
|
||||||
generateTestData();
|
generateTestData();
|
||||||
|
|
||||||
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
|
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
|
||||||
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
|
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
|
||||||
|
|
||||||
|
OCL_EXPECT_MATS_NEAR(dst, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising_hsep;
|
||||||
|
|
||||||
|
OCL_TEST_P(FastNlMeansDenoising_hsep, Mat)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < test_loop_times; j++)
|
||||||
|
{
|
||||||
|
generateTestData();
|
||||||
|
|
||||||
|
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize, normType));
|
||||||
|
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize, normType));
|
||||||
|
|
||||||
OCL_EXPECT_MATS_NEAR(dst, 1);
|
OCL_EXPECT_MATS_NEAR(dst, 1);
|
||||||
}
|
}
|
||||||
@ -80,15 +112,21 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat)
|
|||||||
{
|
{
|
||||||
generateTestData();
|
generateTestData();
|
||||||
|
|
||||||
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize));
|
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
|
||||||
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize));
|
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
|
||||||
|
|
||||||
OCL_EXPECT_MATS_NEAR(dst, 1);
|
OCL_EXPECT_MATS_NEAR(dst, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool()));
|
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising,
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool()));
|
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
|
||||||
|
Bool(), Values(true)));
|
||||||
|
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep,
|
||||||
|
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
|
||||||
|
Bool(), Values(true)));
|
||||||
|
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored,
|
||||||
|
Combine(Values(3, 4), Values((int)NORM_L2), Bool(), Values(false)));
|
||||||
|
|
||||||
} } // namespace cvtest::ocl
|
} } // namespace cvtest::ocl
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user