significantly improved parallel non-local means by using granularity parameter in parallel_for_ loop. Because the algorithm deals with sliding sums, it's essential that each thread has enough work to do, otherwise the algorithm gets higher theoretical complexity and thus there is no speedup comparing to 1-thread code (at best).
This commit is contained in:
@@ -50,42 +50,50 @@ static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<f
|
|||||||
int templateWindowSize, int searchWindowSize)
|
int templateWindowSize, int searchWindowSize)
|
||||||
{
|
{
|
||||||
int hn = (int)h.size();
|
int hn = (int)h.size();
|
||||||
|
double granularity = (double)std::max(1., (double)dst.total()/(1 << 17));
|
||||||
|
|
||||||
switch (CV_MAT_CN(src.type())) {
|
switch (CV_MAT_CN(src.type())) {
|
||||||
case 1:
|
case 1:
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
|
FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
src, dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
if (hn == 1)
|
if (hn == 1)
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
|
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
src, dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
else
|
else
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
|
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
src, dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
if (hn == 1)
|
if (hn == 1)
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
|
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
src, dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
else
|
else
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
|
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
src, dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
if (hn == 1)
|
if (hn == 1)
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
|
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
src, dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
else
|
else
|
||||||
parallel_for_(cv::Range(0, src.rows),
|
parallel_for_(cv::Range(0, src.rows),
|
||||||
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
|
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
|
||||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
src, dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(Error::StsBadArg,
|
CV_Error(Error::StsBadArg,
|
||||||
@@ -237,6 +245,7 @@ static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& ds
|
|||||||
int templateWindowSize, int searchWindowSize)
|
int templateWindowSize, int searchWindowSize)
|
||||||
{
|
{
|
||||||
int hn = (int)h.size();
|
int hn = (int)h.size();
|
||||||
|
double granularity = (double)std::max(1., (double)dst.total()/(1 << 16));
|
||||||
|
|
||||||
switch (srcImgs[0].type())
|
switch (srcImgs[0].type())
|
||||||
{
|
{
|
||||||
@@ -244,43 +253,50 @@ static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& ds
|
|||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
|
FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
break;
|
break;
|
||||||
case CV_8UC2:
|
case CV_8UC2:
|
||||||
if (hn == 1)
|
if (hn == 1)
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
else
|
else
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
break;
|
break;
|
||||||
case CV_8UC3:
|
case CV_8UC3:
|
||||||
if (hn == 1)
|
if (hn == 1)
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
else
|
else
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
break;
|
break;
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
if (hn == 1)
|
if (hn == 1)
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
else
|
else
|
||||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
|
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
|
||||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
dst, templateWindowSize, searchWindowSize, &h[0]),
|
||||||
|
granularity);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(Error::StsBadArg,
|
CV_Error(Error::StsBadArg,
|
||||||
|
@@ -156,3 +156,14 @@ TEST(Photo_White, issue_2646)
|
|||||||
|
|
||||||
ASSERT_EQ(0, nonWhitePixelsCount);
|
ASSERT_EQ(0, nonWhitePixelsCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Photo_Denoising, speed)
|
||||||
|
{
|
||||||
|
string imgname = string(cvtest::TS::ptr()->get_data_path()) + "shared/5MP.png";
|
||||||
|
Mat src = imread(imgname, 0), dst;
|
||||||
|
|
||||||
|
double t = (double)getTickCount();
|
||||||
|
fastNlMeansDenoising(src, dst, 5, 7, 21);
|
||||||
|
t = (double)getTickCount() - t;
|
||||||
|
printf("execution time: %gms\n", t*1000./getTickFrequency());
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user