Merge pull request #2382 from ilya-lavrenov:tapi_nlmeans
This commit is contained in:
@@ -26,7 +26,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscale)
|
||||
OCL_TEST_CYCLE()
|
||||
cv::fastNlMeansDenoising(original, result, 10);
|
||||
|
||||
SANITY_CHECK(result);
|
||||
SANITY_CHECK(result, 1);
|
||||
}
|
||||
|
||||
OCL_PERF_TEST(Photo, DenoisingColored)
|
||||
@@ -42,10 +42,10 @@ OCL_PERF_TEST(Photo, DenoisingColored)
|
||||
OCL_TEST_CYCLE()
|
||||
cv::fastNlMeansDenoisingColored(original, result, 10, 10);
|
||||
|
||||
SANITY_CHECK(result);
|
||||
SANITY_CHECK(result, 2);
|
||||
}
|
||||
|
||||
OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti)
|
||||
OCL_PERF_TEST(Photo, DISABLED_DenoisingGrayscaleMulti)
|
||||
{
|
||||
const int imgs_count = 3;
|
||||
|
||||
@@ -68,7 +68,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti)
|
||||
SANITY_CHECK(result);
|
||||
}
|
||||
|
||||
OCL_PERF_TEST(Photo, DenoisingColoredMulti)
|
||||
OCL_PERF_TEST(Photo, DISABLED_DenoisingColoredMulti)
|
||||
{
|
||||
const int imgs_count = 3;
|
||||
|
||||
|
@@ -39,10 +39,14 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/core/base.hpp"
|
||||
|
||||
#ifndef __OPENCV_DENOISING_ARRAYS_HPP__
|
||||
#define __OPENCV_DENOISING_ARRAYS_HPP__
|
||||
|
||||
template <class T> struct Array2d {
|
||||
template <class T>
|
||||
struct Array2d
|
||||
{
|
||||
T* a;
|
||||
int n1,n2;
|
||||
bool needToDeallocArray;
|
||||
@@ -50,14 +54,16 @@ template <class T> struct Array2d {
|
||||
Array2d(const Array2d& array2d):
|
||||
a(array2d.a), n1(array2d.n1), n2(array2d.n2), needToDeallocArray(false)
|
||||
{
|
||||
if (array2d.needToDeallocArray) {
|
||||
// copy constructor for self allocating arrays not supported
|
||||
throw new std::exception();
|
||||
if (array2d.needToDeallocArray)
|
||||
{
|
||||
CV_Error(Error::BadDataPtr, "Copy constructor for self allocating arrays not supported");
|
||||
}
|
||||
}
|
||||
|
||||
Array2d(T* _a, int _n1, int _n2):
|
||||
a(_a), n1(_n1), n2(_n2), needToDeallocArray(false) {}
|
||||
a(_a), n1(_n1), n2(_n2), needToDeallocArray(false)
|
||||
{
|
||||
}
|
||||
|
||||
Array2d(int _n1, int _n2):
|
||||
n1(_n1), n2(_n2), needToDeallocArray(true)
|
||||
@@ -65,28 +71,34 @@ template <class T> struct Array2d {
|
||||
a = new T[n1*n2];
|
||||
}
|
||||
|
||||
~Array2d() {
|
||||
if (needToDeallocArray) {
|
||||
~Array2d()
|
||||
{
|
||||
if (needToDeallocArray)
|
||||
delete[] a;
|
||||
}
|
||||
}
|
||||
|
||||
T* operator [] (int i) {
|
||||
T* operator [] (int i)
|
||||
{
|
||||
return a + i*n2;
|
||||
}
|
||||
|
||||
inline T* row_ptr(int i) {
|
||||
inline T* row_ptr(int i)
|
||||
{
|
||||
return (*this)[i];
|
||||
}
|
||||
};
|
||||
|
||||
template <class T> struct Array3d {
|
||||
template <class T>
|
||||
struct Array3d
|
||||
{
|
||||
T* a;
|
||||
int n1,n2,n3;
|
||||
bool needToDeallocArray;
|
||||
|
||||
Array3d(T* _a, int _n1, int _n2, int _n3):
|
||||
a(_a), n1(_n1), n2(_n2), n3(_n3), needToDeallocArray(false) {}
|
||||
a(_a), n1(_n1), n2(_n2), n3(_n3), needToDeallocArray(false)
|
||||
{
|
||||
}
|
||||
|
||||
Array3d(int _n1, int _n2, int _n3):
|
||||
n1(_n1), n2(_n2), n3(_n3), needToDeallocArray(true)
|
||||
@@ -94,64 +106,72 @@ template <class T> struct Array3d {
|
||||
a = new T[n1*n2*n3];
|
||||
}
|
||||
|
||||
~Array3d() {
|
||||
if (needToDeallocArray) {
|
||||
~Array3d()
|
||||
{
|
||||
if (needToDeallocArray)
|
||||
delete[] a;
|
||||
}
|
||||
}
|
||||
|
||||
Array2d<T> operator [] (int i) {
|
||||
Array2d<T> operator [] (int i)
|
||||
{
|
||||
Array2d<T> array2d(a + i*n2*n3, n2, n3);
|
||||
return array2d;
|
||||
}
|
||||
|
||||
inline T* row_ptr(int i1, int i2) {
|
||||
inline T* row_ptr(int i1, int i2)
|
||||
{
|
||||
return a + i1*n2*n3 + i2*n3;
|
||||
}
|
||||
};
|
||||
|
||||
template <class T> struct Array4d {
|
||||
template <class T>
|
||||
struct Array4d
|
||||
{
|
||||
T* a;
|
||||
int n1,n2,n3,n4;
|
||||
bool needToDeallocArray;
|
||||
int steps[4];
|
||||
|
||||
void init_steps() {
|
||||
void init_steps()
|
||||
{
|
||||
steps[0] = n2*n3*n4;
|
||||
steps[1] = n3*n4;
|
||||
steps[2] = n4;
|
||||
steps[3] = 1;
|
||||
}
|
||||
|
||||
Array4d(T* _a, int _n1, int _n2, int _n3, int _n4):
|
||||
Array4d(T* _a, int _n1, int _n2, int _n3, int _n4) :
|
||||
a(_a), n1(_n1), n2(_n2), n3(_n3), n4(_n4), needToDeallocArray(false)
|
||||
{
|
||||
{
|
||||
init_steps();
|
||||
}
|
||||
}
|
||||
|
||||
Array4d(int _n1, int _n2, int _n3, int _n4):
|
||||
Array4d(int _n1, int _n2, int _n3, int _n4) :
|
||||
n1(_n1), n2(_n2), n3(_n3), n4(_n4), needToDeallocArray(true)
|
||||
{
|
||||
a = new T[n1*n2*n3*n4];
|
||||
init_steps();
|
||||
}
|
||||
|
||||
~Array4d() {
|
||||
if (needToDeallocArray) {
|
||||
delete[] a;
|
||||
}
|
||||
}
|
||||
|
||||
Array3d<T> operator [] (int i) {
|
||||
~Array4d()
|
||||
{
|
||||
if (needToDeallocArray)
|
||||
delete[] a;
|
||||
}
|
||||
|
||||
Array3d<T> operator [] (int i)
|
||||
{
|
||||
Array3d<T> array3d(a + i*n2*n3*n4, n2, n3, n4);
|
||||
return array3d;
|
||||
}
|
||||
|
||||
inline T* row_ptr(int i1, int i2, int i3) {
|
||||
inline T* row_ptr(int i1, int i2, int i3)
|
||||
{
|
||||
return a + i1*n2*n3*n4 + i2*n3*n4 + i3*n4;
|
||||
}
|
||||
|
||||
inline int step_size(int dimension) {
|
||||
inline int step_size(int dimension)
|
||||
{
|
||||
return steps[dimension];
|
||||
}
|
||||
};
|
||||
|
@@ -40,14 +40,17 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/photo.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
|
||||
#include "fast_nlmeans_denoising_invoker.hpp"
|
||||
#include "fast_nlmeans_multi_denoising_invoker.hpp"
|
||||
#include "fast_nlmeans_denoising_opencl.hpp"
|
||||
|
||||
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()),
|
||||
ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
_dst.create(src.size(), src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
@@ -83,15 +86,20 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
||||
float h, float hForColorComponents,
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
Mat src = _src.getMat();
|
||||
_dst.create(src.size(), src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
if (src.type() != CV_8UC3) {
|
||||
if (_src.type() != CV_8UC3)
|
||||
{
|
||||
CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!");
|
||||
return;
|
||||
}
|
||||
|
||||
CV_OCL_RUN(_src.dims() <= 2 && (_dst.isUMat() || _src.isUMat()),
|
||||
ocl_fastNlMeansDenoisingColored(_src, _dst, h, hForColorComponents,
|
||||
templateWindowSize, searchWindowSize))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
_dst.create(src.size(), src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
Mat src_lab;
|
||||
cvtColor(src, src_lab, COLOR_LBGR2Lab);
|
||||
|
||||
@@ -117,7 +125,8 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
int src_imgs_size = static_cast<int>(srcImgs.size());
|
||||
if (src_imgs_size == 0) {
|
||||
if (src_imgs_size == 0)
|
||||
{
|
||||
CV_Error(Error::StsBadArg, "Input images vector should not be empty!");
|
||||
}
|
||||
|
||||
@@ -136,11 +145,11 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
|
||||
"should be chosen corresponding srcImgs size!");
|
||||
}
|
||||
|
||||
for (int i = 1; i < src_imgs_size; i++) {
|
||||
if (srcImgs[0].size() != srcImgs[i].size() || srcImgs[0].type() != srcImgs[i].type()) {
|
||||
for (int i = 1; i < src_imgs_size; i++)
|
||||
if (srcImgs[0].size() != srcImgs[i].size() || srcImgs[0].type() != srcImgs[i].type())
|
||||
{
|
||||
CV_Error(Error::StsBadArg, "Input images should have the same size and type!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
|
||||
@@ -152,12 +161,13 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
|
||||
|
||||
fastNlMeansDenoisingMultiCheckPreconditions(
|
||||
srcImgs, imgToDenoiseIndex,
|
||||
temporalWindowSize, templateWindowSize, searchWindowSize
|
||||
);
|
||||
temporalWindowSize, templateWindowSize, searchWindowSize);
|
||||
|
||||
_dst.create(srcImgs[0].size(), srcImgs[0].type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
switch (srcImgs[0].type()) {
|
||||
switch (srcImgs[0].type())
|
||||
{
|
||||
case CV_8U:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<uchar>(
|
||||
@@ -192,15 +202,15 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
|
||||
|
||||
fastNlMeansDenoisingMultiCheckPreconditions(
|
||||
srcImgs, imgToDenoiseIndex,
|
||||
temporalWindowSize, templateWindowSize, searchWindowSize
|
||||
);
|
||||
temporalWindowSize, templateWindowSize, searchWindowSize);
|
||||
|
||||
_dst.create(srcImgs[0].size(), srcImgs[0].type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
int src_imgs_size = static_cast<int>(srcImgs.size());
|
||||
|
||||
if (srcImgs[0].type() != CV_8UC3) {
|
||||
if (srcImgs[0].type() != CV_8UC3)
|
||||
{
|
||||
CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!");
|
||||
return;
|
||||
}
|
||||
@@ -211,7 +221,8 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
|
||||
std::vector<Mat> src_lab(src_imgs_size);
|
||||
std::vector<Mat> l(src_imgs_size);
|
||||
std::vector<Mat> ab(src_imgs_size);
|
||||
for (int i = 0; i < src_imgs_size; i++) {
|
||||
for (int i = 0; i < src_imgs_size; i++)
|
||||
{
|
||||
src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3);
|
||||
l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1);
|
||||
ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2);
|
||||
|
@@ -51,61 +51,61 @@
|
||||
using namespace cv;
|
||||
|
||||
template <typename T>
|
||||
struct FastNlMeansDenoisingInvoker : ParallelLoopBody {
|
||||
public:
|
||||
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
|
||||
int template_window_size, int search_window_size, const float h);
|
||||
struct FastNlMeansDenoisingInvoker :
|
||||
public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
|
||||
int template_window_size, int search_window_size, const float h);
|
||||
|
||||
void operator() (const Range& range) const;
|
||||
void operator() (const Range& range) const;
|
||||
|
||||
private:
|
||||
void operator= (const FastNlMeansDenoisingInvoker&);
|
||||
private:
|
||||
void operator= (const FastNlMeansDenoisingInvoker&);
|
||||
|
||||
const Mat& src_;
|
||||
Mat& dst_;
|
||||
const Mat& src_;
|
||||
Mat& dst_;
|
||||
|
||||
Mat extended_src_;
|
||||
int border_size_;
|
||||
Mat extended_src_;
|
||||
int border_size_;
|
||||
|
||||
int template_window_size_;
|
||||
int search_window_size_;
|
||||
int template_window_size_;
|
||||
int search_window_size_;
|
||||
|
||||
int template_window_half_size_;
|
||||
int search_window_half_size_;
|
||||
int template_window_half_size_;
|
||||
int search_window_half_size_;
|
||||
|
||||
int fixed_point_mult_;
|
||||
int almost_template_window_size_sq_bin_shift_;
|
||||
std::vector<int> almost_dist2weight_;
|
||||
int fixed_point_mult_;
|
||||
int almost_template_window_size_sq_bin_shift_;
|
||||
std::vector<int> almost_dist2weight_;
|
||||
|
||||
void calcDistSumsForFirstElementInRow(
|
||||
int i,
|
||||
Array2d<int>& dist_sums,
|
||||
Array3d<int>& col_dist_sums,
|
||||
Array3d<int>& up_col_dist_sums) const;
|
||||
void calcDistSumsForFirstElementInRow(
|
||||
int i, Array2d<int>& dist_sums,
|
||||
Array3d<int>& col_dist_sums,
|
||||
Array3d<int>& up_col_dist_sums) const;
|
||||
|
||||
void calcDistSumsForElementInFirstRow(
|
||||
int i,
|
||||
int j,
|
||||
int first_col_num,
|
||||
Array2d<int>& dist_sums,
|
||||
Array3d<int>& col_dist_sums,
|
||||
Array3d<int>& up_col_dist_sums) const;
|
||||
void calcDistSumsForElementInFirstRow(
|
||||
int i, int j, int first_col_num,
|
||||
Array2d<int>& dist_sums,
|
||||
Array3d<int>& col_dist_sums,
|
||||
Array3d<int>& up_col_dist_sums) const;
|
||||
};
|
||||
|
||||
inline int getNearestPowerOf2(int value)
|
||||
{
|
||||
int p = 0;
|
||||
while( 1 << p < value) ++p;
|
||||
while( 1 << p < value)
|
||||
++p;
|
||||
return p;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
||||
const cv::Mat& src,
|
||||
cv::Mat& dst,
|
||||
const Mat& src, Mat& dst,
|
||||
int template_window_size,
|
||||
int search_window_size,
|
||||
const float h) : src_(src), dst_(dst)
|
||||
const float h) :
|
||||
src_(src), dst_(dst)
|
||||
{
|
||||
CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b
|
||||
|
||||
@@ -115,26 +115,25 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
||||
search_window_size_ = search_window_half_size_ * 2 + 1;
|
||||
|
||||
border_size_ = search_window_half_size_ + template_window_half_size_;
|
||||
copyMakeBorder(src_, extended_src_,
|
||||
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
|
||||
copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
|
||||
|
||||
const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255;
|
||||
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
|
||||
|
||||
// precalc weight for every possible l2 dist between blocks
|
||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||
|
||||
CV_Assert(template_window_size_ <= 46340 ); // sqrt(INT_MAX)
|
||||
CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX)
|
||||
int template_window_size_sq = template_window_size_ * template_window_size_;
|
||||
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
|
||||
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
|
||||
|
||||
int max_dist = 255 * 255 * sizeof(T);
|
||||
int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1);
|
||||
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
|
||||
almost_dist2weight_.resize(almost_max_dist);
|
||||
|
||||
const double WEIGHT_THRESHOLD = 0.001;
|
||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) {
|
||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||
{
|
||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
||||
|
||||
@@ -144,50 +143,56 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
||||
almost_dist2weight_[almost_dist] = weight;
|
||||
}
|
||||
CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
|
||||
// additional optimization init end
|
||||
|
||||
if (dst_.empty()) {
|
||||
// additional optimization init end
|
||||
if (dst_.empty())
|
||||
dst_ = Mat::zeros(src_.size(), src_.type());
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
|
||||
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
{
|
||||
int row_from = range.start;
|
||||
int row_to = range.end - 1;
|
||||
|
||||
// sums of cols anf rows for current pixel p
|
||||
Array2d<int> dist_sums(search_window_size_, search_window_size_);
|
||||
|
||||
// for lazy calc optimization
|
||||
// for lazy calc optimization (sum of cols for current pixel)
|
||||
Array3d<int> col_dist_sums(template_window_size_, search_window_size_, search_window_size_);
|
||||
|
||||
int first_col_num = -1;
|
||||
// last elements of column sum (for each element in row)
|
||||
Array3d<int> up_col_dist_sums(src_.cols, search_window_size_, search_window_size_);
|
||||
|
||||
for (int i = row_from; i <= row_to; i++) {
|
||||
for (int j = 0; j < src_.cols; j++) {
|
||||
for (int i = row_from; i <= row_to; i++)
|
||||
{
|
||||
for (int j = 0; j < src_.cols; j++)
|
||||
{
|
||||
int search_window_y = i - search_window_half_size_;
|
||||
int search_window_x = j - search_window_half_size_;
|
||||
|
||||
// calc dist_sums
|
||||
if (j == 0) {
|
||||
if (j == 0)
|
||||
{
|
||||
calcDistSumsForFirstElementInRow(i, dist_sums, col_dist_sums, up_col_dist_sums);
|
||||
first_col_num = 0;
|
||||
|
||||
} else { // calc cur dist_sums using previous dist_sums
|
||||
if (i == row_from) {
|
||||
}
|
||||
else
|
||||
{
|
||||
// calc cur dist_sums using previous dist_sums
|
||||
if (i == row_from)
|
||||
{
|
||||
calcDistSumsForElementInFirstRow(i, j, first_col_num,
|
||||
dist_sums, col_dist_sums, up_col_dist_sums);
|
||||
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
int ay = border_size_ + i;
|
||||
int ax = border_size_ + j + template_window_half_size_;
|
||||
|
||||
int start_by =
|
||||
border_size_ + i - search_window_half_size_;
|
||||
|
||||
int start_bx =
|
||||
border_size_ + j - search_window_half_size_ + template_window_half_size_;
|
||||
int start_by = border_size_ + i - search_window_half_size_;
|
||||
int start_bx = border_size_ + j - search_window_half_size_ + template_window_half_size_;
|
||||
|
||||
T a_up = extended_src_.at<T>(ay - template_window_half_size_ - 1, ax);
|
||||
T a_down = extended_src_.at<T>(ay + template_window_half_size_, ax);
|
||||
@@ -195,33 +200,25 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
|
||||
// copy class member to local variable for optimization
|
||||
int search_window_size = search_window_size_;
|
||||
|
||||
for (int y = 0; y < search_window_size; y++) {
|
||||
int* dist_sums_row = dist_sums.row_ptr(y);
|
||||
for (int y = 0; y < search_window_size; y++)
|
||||
{
|
||||
int * dist_sums_row = dist_sums.row_ptr(y);
|
||||
int * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y);
|
||||
int * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y);
|
||||
|
||||
int* col_dist_sums_row = col_dist_sums.row_ptr(first_col_num,y);
|
||||
const T * b_up_ptr = extended_src_.ptr<T>(start_by - template_window_half_size_ - 1 + y);
|
||||
const T * b_down_ptr = extended_src_.ptr<T>(start_by + template_window_half_size_ + y);
|
||||
|
||||
int* up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y);
|
||||
|
||||
const T* b_up_ptr =
|
||||
extended_src_.ptr<T>(start_by - template_window_half_size_ - 1 + y);
|
||||
|
||||
const T* b_down_ptr =
|
||||
extended_src_.ptr<T>(start_by + template_window_half_size_ + y);
|
||||
|
||||
for (int x = 0; x < search_window_size; x++) {
|
||||
for (int x = 0; x < search_window_size; x++)
|
||||
{
|
||||
// remove from current pixel sum column sum with index "first_col_num"
|
||||
dist_sums_row[x] -= col_dist_sums_row[x];
|
||||
|
||||
col_dist_sums_row[x] =
|
||||
up_col_dist_sums_row[x] +
|
||||
calcUpDownDist(
|
||||
a_up, a_down,
|
||||
b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]
|
||||
);
|
||||
int bx = start_bx + x;
|
||||
col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
|
||||
|
||||
dist_sums_row[x] += col_dist_sums_row[x];
|
||||
|
||||
up_col_dist_sums_row[x] = col_dist_sums_row[x];
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -230,20 +227,17 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
|
||||
}
|
||||
|
||||
// calc weights
|
||||
int weights_sum = 0;
|
||||
|
||||
int estimation[3];
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) {
|
||||
int estimation[3], weights_sum = 0;
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
estimation[channel_num] = 0;
|
||||
}
|
||||
|
||||
for (int y = 0; y < search_window_size_; y++) {
|
||||
for (int y = 0; y < search_window_size_; y++)
|
||||
{
|
||||
const T* cur_row_ptr = extended_src_.ptr<T>(border_size_ + search_window_y + y);
|
||||
int* dist_sums_row = dist_sums.row_ptr(y);
|
||||
for (int x = 0; x < search_window_size_; x++) {
|
||||
int almostAvgDist =
|
||||
dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
|
||||
|
||||
for (int x = 0; x < search_window_size_; x++)
|
||||
{
|
||||
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
|
||||
int weight = almost_dist2weight_[almostAvgDist];
|
||||
weights_sum += weight;
|
||||
|
||||
@@ -269,18 +263,19 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
||||
{
|
||||
int j = 0;
|
||||
|
||||
for (int y = 0; y < search_window_size_; y++) {
|
||||
for (int x = 0; x < search_window_size_; x++) {
|
||||
for (int y = 0; y < search_window_size_; y++)
|
||||
for (int x = 0; x < search_window_size_; x++)
|
||||
{
|
||||
dist_sums[y][x] = 0;
|
||||
for (int tx = 0; tx < template_window_size_; tx++) {
|
||||
for (int tx = 0; tx < template_window_size_; tx++)
|
||||
col_dist_sums[tx][y][x] = 0;
|
||||
}
|
||||
|
||||
int start_y = i + y - search_window_half_size_;
|
||||
int start_x = j + x - search_window_half_size_;
|
||||
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) {
|
||||
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) {
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
|
||||
{
|
||||
int dist = calcDist<T>(extended_src_,
|
||||
border_size_ + i + ty, border_size_ + j + tx,
|
||||
border_size_ + start_y + ty, border_size_ + start_x + tx);
|
||||
@@ -288,18 +283,14 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
||||
dist_sums[y][x] += dist;
|
||||
col_dist_sums[tx + template_window_half_size_][y][x] += dist;
|
||||
}
|
||||
}
|
||||
|
||||
up_col_dist_sums[j][y][x] = col_dist_sums[template_window_size_ - 1][y][x];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
||||
int i,
|
||||
int j,
|
||||
int first_col_num,
|
||||
int i, int j, int first_col_num,
|
||||
Array2d<int>& dist_sums,
|
||||
Array3d<int>& col_dist_sums,
|
||||
Array3d<int>& up_col_dist_sums) const
|
||||
@@ -312,23 +303,20 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
||||
|
||||
int new_last_col_num = first_col_num;
|
||||
|
||||
for (int y = 0; y < search_window_size_; y++) {
|
||||
for (int x = 0; x < search_window_size_; x++) {
|
||||
for (int y = 0; y < search_window_size_; y++)
|
||||
for (int x = 0; x < search_window_size_; x++)
|
||||
{
|
||||
dist_sums[y][x] -= col_dist_sums[first_col_num][y][x];
|
||||
|
||||
col_dist_sums[new_last_col_num][y][x] = 0;
|
||||
int by = start_by + y;
|
||||
int bx = start_bx + x;
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) {
|
||||
col_dist_sums[new_last_col_num][y][x] +=
|
||||
calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
|
||||
}
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||
col_dist_sums[new_last_col_num][y][x] += calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
|
||||
|
||||
dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
|
||||
|
||||
up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -46,29 +46,35 @@ using namespace cv;
|
||||
|
||||
template <typename T> static inline int calcDist(const T a, const T b);
|
||||
|
||||
template <> inline int calcDist(const uchar a, const uchar b) {
|
||||
template <> inline int calcDist(const uchar a, const uchar b)
|
||||
{
|
||||
return (a-b) * (a-b);
|
||||
}
|
||||
|
||||
template <> inline int calcDist(const Vec2b a, const Vec2b b) {
|
||||
template <> inline int calcDist(const Vec2b a, const Vec2b b)
|
||||
{
|
||||
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]);
|
||||
}
|
||||
|
||||
template <> inline int calcDist(const Vec3b a, const Vec3b b) {
|
||||
template <> inline int calcDist(const Vec3b a, const Vec3b b)
|
||||
{
|
||||
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]);
|
||||
}
|
||||
|
||||
template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) {
|
||||
template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
|
||||
{
|
||||
const T a = m.at<T>(i1, j1);
|
||||
const T b = m.at<T>(i2, j2);
|
||||
return calcDist<T>(a,b);
|
||||
}
|
||||
|
||||
template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) {
|
||||
return calcDist(a_down,b_down) - calcDist(a_up, b_up);
|
||||
template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
|
||||
{
|
||||
return calcDist(a_down, b_down) - calcDist(a_up, b_up);
|
||||
}
|
||||
|
||||
template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down) {
|
||||
template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down)
|
||||
{
|
||||
int A = a_down - b_down;
|
||||
int B = a_up - b_up;
|
||||
return (A-B)*(A+B);
|
||||
@@ -76,16 +82,37 @@ template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uch
|
||||
|
||||
template <typename T> static inline void incWithWeight(int* estimation, int weight, T p);
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, uchar p) {
|
||||
template <> inline void incWithWeight(int* estimation, int weight, uchar p)
|
||||
{
|
||||
estimation[0] += weight * p;
|
||||
}
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec2b p) {
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec2b p)
|
||||
{
|
||||
estimation[0] += weight * p[0];
|
||||
estimation[1] += weight * p[1];
|
||||
}
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) {
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec3b p)
|
||||
{
|
||||
estimation[0] += weight * p[0];
|
||||
estimation[1] += weight * p[1];
|
||||
estimation[2] += weight * p[2];
|
||||
}
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, int p)
|
||||
{
|
||||
estimation[0] += weight * p;
|
||||
}
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec2i p)
|
||||
{
|
||||
estimation[0] += weight * p[0];
|
||||
estimation[1] += weight * p[1];
|
||||
}
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec3i p)
|
||||
{
|
||||
estimation[0] += weight * p[0];
|
||||
estimation[1] += weight * p[1];
|
||||
estimation[2] += weight * p[2];
|
||||
@@ -93,18 +120,21 @@ template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) {
|
||||
|
||||
template <typename T> static inline T saturateCastFromArray(int* estimation);
|
||||
|
||||
template <> inline uchar saturateCastFromArray(int* estimation) {
|
||||
template <> inline uchar saturateCastFromArray(int* estimation)
|
||||
{
|
||||
return saturate_cast<uchar>(estimation[0]);
|
||||
}
|
||||
|
||||
template <> inline Vec2b saturateCastFromArray(int* estimation) {
|
||||
template <> inline Vec2b saturateCastFromArray(int* estimation)
|
||||
{
|
||||
Vec2b res;
|
||||
res[0] = saturate_cast<uchar>(estimation[0]);
|
||||
res[1] = saturate_cast<uchar>(estimation[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <> inline Vec3b saturateCastFromArray(int* estimation) {
|
||||
template <> inline Vec3b saturateCastFromArray(int* estimation)
|
||||
{
|
||||
Vec3b res;
|
||||
res[0] = saturate_cast<uchar>(estimation[0]);
|
||||
res[1] = saturate_cast<uchar>(estimation[1]);
|
||||
@@ -112,4 +142,20 @@ template <> inline Vec3b saturateCastFromArray(int* estimation) {
|
||||
return res;
|
||||
}
|
||||
|
||||
template <> inline int saturateCastFromArray(int* estimation)
|
||||
{
|
||||
return estimation[0];
|
||||
}
|
||||
|
||||
template <> inline Vec2i saturateCastFromArray(int* estimation)
|
||||
{
|
||||
estimation[1] = 0;
|
||||
return Vec2i(estimation);
|
||||
}
|
||||
|
||||
template <> inline Vec3i saturateCastFromArray(int* estimation)
|
||||
{
|
||||
return Vec3i(estimation);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
162
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
Normal file
162
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
Normal file
@@ -0,0 +1,162 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#ifndef __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
|
||||
#define __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cv {
|
||||
|
||||
enum
|
||||
{
|
||||
BLOCK_ROWS = 32,
|
||||
BLOCK_COLS = 32,
|
||||
CTA_SIZE = 256
|
||||
};
|
||||
|
||||
static int divUp(int a, int b)
|
||||
{
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
template <typename FT>
|
||||
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
|
||||
int & almostTemplateWindowSizeSqBinShift)
|
||||
{
|
||||
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
|
||||
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
|
||||
int depth = DataType<FT>::depth;
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
if (depth == CV_64F && !doubleSupport)
|
||||
return false;
|
||||
|
||||
// precalc weight for every possible l2 dist between blocks
|
||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||
CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX)
|
||||
int templateWindowSizeSq = templateWindowSize * templateWindowSize;
|
||||
almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq);
|
||||
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
|
||||
|
||||
const FT WEIGHT_THRESHOLD = 1e-3f;
|
||||
int maxDist = 255 * 255 * cn;
|
||||
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
|
||||
FT den = 1.0f / (h * h * cn);
|
||||
|
||||
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
|
||||
|
||||
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
|
||||
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
|
||||
almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);
|
||||
|
||||
size_t globalsize[1] = { almostMaxDist };
|
||||
return k.run(1, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
int type = _src.type(), cn = CV_MAT_CN(type);
|
||||
Size size = _src.size();
|
||||
|
||||
if ( type != CV_8UC1 || type != CV_8UC2 || type != CV_8UC4 )
|
||||
return false;
|
||||
|
||||
int templateWindowHalfWize = templateWindowSize / 2;
|
||||
int searchWindowHalfSize = searchWindowSize / 2;
|
||||
templateWindowSize = templateWindowHalfWize * 2 + 1;
|
||||
searchWindowSize = searchWindowHalfSize * 2 + 1;
|
||||
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
|
||||
int almostTemplateWindowSizeSqBinShift = -1;
|
||||
|
||||
char cvt[2][40];
|
||||
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
|
||||
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
|
||||
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
|
||||
" -D convert_int_t=%s -D cn=%d -D CTA_SIZE2=%d -D convert_uchar_t=%s",
|
||||
templateWindowSize, searchWindowSize, ocl::typeToStr(type),
|
||||
ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, CTA_SIZE,
|
||||
templateWindowHalfWize, searchWindowHalfSize,
|
||||
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
|
||||
CTA_SIZE >> 1, ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));
|
||||
|
||||
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat almostDist2Weight;
|
||||
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
|
||||
almostTemplateWindowSizeSqBinShift))
|
||||
return false;
|
||||
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
|
||||
|
||||
UMat srcex;
|
||||
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
|
||||
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
|
||||
|
||||
_dst.create(size, type);
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
|
||||
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
|
||||
Size colSumSize(nblocksx * templateWindowSize, searchWindowSizeSq * nblocksy);
|
||||
UMat buffer(upColSumSize + colSumSize, CV_32SC(cn));
|
||||
|
||||
srcex = srcex(Rect(Point(borderSize, borderSize), size));
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(srcex), ocl::KernelArg::WriteOnly(dst),
|
||||
ocl::KernelArg::PtrReadOnly(almostDist2Weight),
|
||||
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
|
||||
|
||||
size_t globalsize[2] = { nblocksx * CTA_SIZE, nblocksy }, localsize[2] = { CTA_SIZE, 1 };
|
||||
return k.run(2, globalsize, localsize, false);
|
||||
}
|
||||
|
||||
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
||||
float h, float hForColorComponents,
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
UMat src = _src.getUMat();
|
||||
_dst.create(src.size(), src.type());
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
UMat src_lab;
|
||||
cvtColor(src, src_lab, COLOR_LBGR2Lab);
|
||||
|
||||
UMat l(src.size(), CV_8U);
|
||||
UMat ab(src.size(), CV_8UC2);
|
||||
std::vector<UMat> l_ab(2), l_ab_denoised(2);
|
||||
l_ab[0] = l;
|
||||
l_ab[1] = ab;
|
||||
l_ab_denoised[0].create(src.size(), CV_8U);
|
||||
l_ab_denoised[1].create(src.size(), CV_8UC2);
|
||||
|
||||
int from_to[] = { 0,0, 1,1, 2,2 };
|
||||
mixChannels(std::vector<UMat>(1, src_lab), l_ab, from_to, 3);
|
||||
|
||||
fastNlMeansDenoising(l_ab[0], l_ab_denoised[0], h, templateWindowSize, searchWindowSize);
|
||||
fastNlMeansDenoising(l_ab[1], l_ab_denoised[1], hForColorComponents, templateWindowSize, searchWindowSize);
|
||||
|
||||
UMat dst_lab(src.size(), src.type());
|
||||
mixChannels(l_ab_denoised, std::vector<UMat>(1, dst_lab), from_to, 3);
|
||||
|
||||
cvtColor(dst_lab, dst, COLOR_Lab2LBGR);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
@@ -51,51 +51,47 @@
|
||||
using namespace cv;
|
||||
|
||||
template <typename T>
|
||||
struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody {
|
||||
public:
|
||||
FastNlMeansMultiDenoisingInvoker(
|
||||
const std::vector<Mat>& srcImgs, int imgToDenoiseIndex, int temporalWindowSize,
|
||||
Mat& dst, int template_window_size, int search_window_size, const float h);
|
||||
struct FastNlMeansMultiDenoisingInvoker :
|
||||
ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
|
||||
int temporalWindowSize, Mat& dst, int template_window_size,
|
||||
int search_window_size, const float h);
|
||||
|
||||
void operator() (const Range& range) const;
|
||||
void operator() (const Range& range) const;
|
||||
|
||||
private:
|
||||
void operator= (const FastNlMeansMultiDenoisingInvoker&);
|
||||
private:
|
||||
void operator= (const FastNlMeansMultiDenoisingInvoker&);
|
||||
|
||||
int rows_;
|
||||
int cols_;
|
||||
int rows_;
|
||||
int cols_;
|
||||
|
||||
Mat& dst_;
|
||||
Mat& dst_;
|
||||
|
||||
std::vector<Mat> extended_srcs_;
|
||||
Mat main_extended_src_;
|
||||
int border_size_;
|
||||
std::vector<Mat> extended_srcs_;
|
||||
Mat main_extended_src_;
|
||||
int border_size_;
|
||||
|
||||
int template_window_size_;
|
||||
int search_window_size_;
|
||||
int temporal_window_size_;
|
||||
int template_window_size_;
|
||||
int search_window_size_;
|
||||
int temporal_window_size_;
|
||||
|
||||
int template_window_half_size_;
|
||||
int search_window_half_size_;
|
||||
int temporal_window_half_size_;
|
||||
int template_window_half_size_;
|
||||
int search_window_half_size_;
|
||||
int temporal_window_half_size_;
|
||||
|
||||
int fixed_point_mult_;
|
||||
int almost_template_window_size_sq_bin_shift;
|
||||
std::vector<int> almost_dist2weight;
|
||||
int fixed_point_mult_;
|
||||
int almost_template_window_size_sq_bin_shift;
|
||||
std::vector<int> almost_dist2weight;
|
||||
|
||||
void calcDistSumsForFirstElementInRow(
|
||||
int i,
|
||||
Array3d<int>& dist_sums,
|
||||
Array4d<int>& col_dist_sums,
|
||||
Array4d<int>& up_col_dist_sums) const;
|
||||
void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
|
||||
Array4d<int>& col_dist_sums,
|
||||
Array4d<int>& up_col_dist_sums) const;
|
||||
|
||||
void calcDistSumsForElementInFirstRow(
|
||||
int i,
|
||||
int j,
|
||||
int first_col_num,
|
||||
Array3d<int>& dist_sums,
|
||||
Array4d<int>& col_dist_sums,
|
||||
Array4d<int>& up_col_dist_sums) const;
|
||||
void calcDistSumsForElementInFirstRow(int i, int j, int first_col_num,
|
||||
Array3d<int>& dist_sums, Array4d<int>& col_dist_sums,
|
||||
Array4d<int>& up_col_dist_sums) const;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
@@ -106,7 +102,8 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
||||
cv::Mat& dst,
|
||||
int template_window_size,
|
||||
int search_window_size,
|
||||
const float h) : dst_(dst), extended_srcs_(srcImgs.size())
|
||||
const float h) :
|
||||
dst_(dst), extended_srcs_(srcImgs.size())
|
||||
{
|
||||
CV_Assert(srcImgs.size() > 0);
|
||||
CV_Assert(srcImgs[0].channels() == sizeof(T));
|
||||
@@ -123,85 +120,84 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
||||
temporal_window_size_ = temporal_window_half_size_ * 2 + 1;
|
||||
|
||||
border_size_ = search_window_half_size_ + template_window_half_size_;
|
||||
for (int i = 0; i < temporal_window_size_; i++) {
|
||||
copyMakeBorder(
|
||||
srcImgs[imgToDenoiseIndex - temporal_window_half_size_ + i], extended_srcs_[i],
|
||||
for (int i = 0; i < temporal_window_size_; i++)
|
||||
copyMakeBorder(srcImgs[imgToDenoiseIndex - temporal_window_half_size_ + i], extended_srcs_[i],
|
||||
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
|
||||
}
|
||||
|
||||
main_extended_src_ = extended_srcs_[temporal_window_half_size_];
|
||||
|
||||
const int max_estimate_sum_value =
|
||||
temporal_window_size_ * search_window_size_ * search_window_size_ * 255;
|
||||
|
||||
const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255;
|
||||
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
|
||||
|
||||
// precalc weight for every possible l2 dist between blocks
|
||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||
int template_window_size_sq = template_window_size_ * template_window_size_;
|
||||
almost_template_window_size_sq_bin_shift = 0;
|
||||
while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq) {
|
||||
while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq)
|
||||
almost_template_window_size_sq_bin_shift++;
|
||||
}
|
||||
|
||||
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
|
||||
double almost_dist2actual_dist_multiplier =
|
||||
((double) almost_template_window_size_sq) / template_window_size_sq;
|
||||
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
|
||||
|
||||
int max_dist = 255 * 255 * sizeof(T);
|
||||
int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1);
|
||||
almost_dist2weight.resize(almost_max_dist);
|
||||
|
||||
const double WEIGHT_THRESHOLD = 0.001;
|
||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) {
|
||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||
{
|
||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
||||
|
||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) {
|
||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
||||
weight = 0;
|
||||
}
|
||||
|
||||
almost_dist2weight[almost_dist] = weight;
|
||||
}
|
||||
CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
|
||||
// additional optimization init end
|
||||
|
||||
if (dst_.empty()) {
|
||||
// additional optimization init end
|
||||
if (dst_.empty())
|
||||
dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const {
|
||||
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
{
|
||||
int row_from = range.start;
|
||||
int row_to = range.end - 1;
|
||||
|
||||
Array3d<int> dist_sums(temporal_window_size_, search_window_size_, search_window_size_);
|
||||
|
||||
// for lazy calc optimization
|
||||
Array4d<int> col_dist_sums(
|
||||
template_window_size_, temporal_window_size_, search_window_size_, search_window_size_);
|
||||
Array4d<int> col_dist_sums(template_window_size_, temporal_window_size_, search_window_size_, search_window_size_);
|
||||
|
||||
int first_col_num = -1;
|
||||
Array4d<int> up_col_dist_sums(cols_, temporal_window_size_, search_window_size_, search_window_size_);
|
||||
|
||||
Array4d<int> up_col_dist_sums(
|
||||
cols_, temporal_window_size_, search_window_size_, search_window_size_);
|
||||
|
||||
for (int i = row_from; i <= row_to; i++) {
|
||||
for (int j = 0; j < cols_; j++) {
|
||||
for (int i = row_from; i <= row_to; i++)
|
||||
{
|
||||
for (int j = 0; j < cols_; j++)
|
||||
{
|
||||
int search_window_y = i - search_window_half_size_;
|
||||
int search_window_x = j - search_window_half_size_;
|
||||
|
||||
// calc dist_sums
|
||||
if (j == 0) {
|
||||
if (j == 0)
|
||||
{
|
||||
calcDistSumsForFirstElementInRow(i, dist_sums, col_dist_sums, up_col_dist_sums);
|
||||
first_col_num = 0;
|
||||
|
||||
} else { // calc cur dist_sums using previous dist_sums
|
||||
if (i == row_from) {
|
||||
}
|
||||
else
|
||||
{
|
||||
// calc cur dist_sums using previous dist_sums
|
||||
if (i == row_from)
|
||||
{
|
||||
calcDistSumsForElementInFirstRow(i, j, first_col_num,
|
||||
dist_sums, col_dist_sums, up_col_dist_sums);
|
||||
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
int ay = border_size_ + i;
|
||||
int ax = border_size_ + j + template_window_half_size_;
|
||||
|
||||
@@ -217,36 +213,31 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
// copy class member to local variable for optimization
|
||||
int search_window_size = search_window_size_;
|
||||
|
||||
for (int d = 0; d < temporal_window_size_; d++) {
|
||||
for (int d = 0; d < temporal_window_size_; d++)
|
||||
{
|
||||
Mat cur_extended_src = extended_srcs_[d];
|
||||
Array2d<int> cur_dist_sums = dist_sums[d];
|
||||
Array2d<int> cur_col_dist_sums = col_dist_sums[first_col_num][d];
|
||||
Array2d<int> cur_up_col_dist_sums = up_col_dist_sums[j][d];
|
||||
for (int y = 0; y < search_window_size; y++) {
|
||||
for (int y = 0; y < search_window_size; y++)
|
||||
{
|
||||
int* dist_sums_row = cur_dist_sums.row_ptr(y);
|
||||
|
||||
int* col_dist_sums_row = cur_col_dist_sums.row_ptr(y);
|
||||
|
||||
int* up_col_dist_sums_row = cur_up_col_dist_sums.row_ptr(y);
|
||||
|
||||
const T* b_up_ptr =
|
||||
cur_extended_src.ptr<T>(start_by - template_window_half_size_ - 1 + y);
|
||||
const T* b_down_ptr =
|
||||
cur_extended_src.ptr<T>(start_by + template_window_half_size_ + y);
|
||||
const T* b_up_ptr = cur_extended_src.ptr<T>(start_by - template_window_half_size_ - 1 + y);
|
||||
const T* b_down_ptr = cur_extended_src.ptr<T>(start_by + template_window_half_size_ + y);
|
||||
|
||||
for (int x = 0; x < search_window_size; x++) {
|
||||
for (int x = 0; x < search_window_size; x++)
|
||||
{
|
||||
dist_sums_row[x] -= col_dist_sums_row[x];
|
||||
|
||||
col_dist_sums_row[x] = up_col_dist_sums_row[x] +
|
||||
calcUpDownDist(
|
||||
a_up, a_down,
|
||||
b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]
|
||||
);
|
||||
calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
|
||||
|
||||
dist_sums_row[x] += col_dist_sums_row[x];
|
||||
|
||||
up_col_dist_sums_row[x] = col_dist_sums_row[x];
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -259,19 +250,21 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
int weights_sum = 0;
|
||||
|
||||
int estimation[3];
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) {
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
estimation[channel_num] = 0;
|
||||
}
|
||||
for (int d = 0; d < temporal_window_size_; d++) {
|
||||
|
||||
for (int d = 0; d < temporal_window_size_; d++)
|
||||
{
|
||||
const Mat& esrc_d = extended_srcs_[d];
|
||||
for (int y = 0; y < search_window_size_; y++) {
|
||||
for (int y = 0; y < search_window_size_; y++)
|
||||
{
|
||||
const T* cur_row_ptr = esrc_d.ptr<T>(border_size_ + search_window_y + y);
|
||||
|
||||
int* dist_sums_row = dist_sums.row_ptr(d, y);
|
||||
|
||||
for (int x = 0; x < search_window_size_; x++) {
|
||||
int almostAvgDist =
|
||||
dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
|
||||
for (int x = 0; x < search_window_size_; x++)
|
||||
{
|
||||
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
|
||||
|
||||
int weight = almost_dist2weight[almostAvgDist];
|
||||
weights_sum += weight;
|
||||
@@ -293,21 +286,19 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
|
||||
template <class T>
|
||||
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
||||
int i,
|
||||
Array3d<int>& dist_sums,
|
||||
Array4d<int>& col_dist_sums,
|
||||
Array4d<int>& up_col_dist_sums) const
|
||||
int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
|
||||
{
|
||||
int j = 0;
|
||||
|
||||
for (int d = 0; d < temporal_window_size_; d++) {
|
||||
for (int d = 0; d < temporal_window_size_; d++)
|
||||
{
|
||||
Mat cur_extended_src = extended_srcs_[d];
|
||||
for (int y = 0; y < search_window_size_; y++) {
|
||||
for (int x = 0; x < search_window_size_; x++) {
|
||||
for (int y = 0; y < search_window_size_; y++)
|
||||
for (int x = 0; x < search_window_size_; x++)
|
||||
{
|
||||
dist_sums[d][y][x] = 0;
|
||||
for (int tx = 0; tx < template_window_size_; tx++) {
|
||||
for (int tx = 0; tx < template_window_size_; tx++)
|
||||
col_dist_sums[tx][d][y][x] = 0;
|
||||
}
|
||||
|
||||
int start_y = i + y - search_window_half_size_;
|
||||
int start_x = j + x - search_window_half_size_;
|
||||
@@ -315,14 +306,13 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
|
||||
int* dist_sums_ptr = &dist_sums[d][y][x];
|
||||
int* col_dist_sums_ptr = &col_dist_sums[0][d][y][x];
|
||||
int col_dist_sums_step = col_dist_sums.step_size(0);
|
||||
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) {
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) {
|
||||
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
|
||||
{
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||
{
|
||||
int dist = calcDist<T>(
|
||||
main_extended_src_.at<T>(
|
||||
border_size_ + i + ty, border_size_ + j + tx),
|
||||
cur_extended_src.at<T>(
|
||||
border_size_ + start_y + ty, border_size_ + start_x + tx)
|
||||
);
|
||||
main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
|
||||
cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
|
||||
|
||||
*dist_sums_ptr += dist;
|
||||
*col_dist_sums_ptr += dist;
|
||||
@@ -332,18 +322,13 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
|
||||
|
||||
up_col_dist_sums[j][d][y][x] = col_dist_sums[template_window_size_ - 1][d][y][x];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
||||
int i,
|
||||
int j,
|
||||
int first_col_num,
|
||||
Array3d<int>& dist_sums,
|
||||
Array4d<int>& col_dist_sums,
|
||||
Array4d<int>& up_col_dist_sums) const
|
||||
int i, int j, int first_col_num, Array3d<int>& dist_sums,
|
||||
Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
|
||||
{
|
||||
int ay = border_size_ + i;
|
||||
int ax = border_size_ + j + template_window_half_size_;
|
||||
@@ -353,10 +338,12 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
|
||||
|
||||
int new_last_col_num = first_col_num;
|
||||
|
||||
for (int d = 0; d < temporal_window_size_; d++) {
|
||||
for (int d = 0; d < temporal_window_size_; d++)
|
||||
{
|
||||
Mat cur_extended_src = extended_srcs_[d];
|
||||
for (int y = 0; y < search_window_size_; y++) {
|
||||
for (int x = 0; x < search_window_size_; x++) {
|
||||
for (int y = 0; y < search_window_size_; y++)
|
||||
for (int x = 0; x < search_window_size_; x++)
|
||||
{
|
||||
dist_sums[d][y][x] -= col_dist_sums[first_col_num][d][y][x];
|
||||
|
||||
col_dist_sums[new_last_col_num][d][y][x] = 0;
|
||||
@@ -364,19 +351,17 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
|
||||
int bx = start_bx + x;
|
||||
|
||||
int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) {
|
||||
*col_dist_sums_ptr +=
|
||||
calcDist<T>(
|
||||
main_extended_src_.at<T>(ay + ty, ax),
|
||||
cur_extended_src.at<T>(by + ty, bx)
|
||||
);
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||
{
|
||||
*col_dist_sums_ptr += calcDist<T>(
|
||||
main_extended_src_.at<T>(ay + ty, ax),
|
||||
cur_extended_src.at<T>(by + ty, bx));
|
||||
}
|
||||
|
||||
dist_sums[d][y][x] += col_dist_sums[new_last_col_num][d][y][x];
|
||||
|
||||
up_col_dist_sums[j][d][y][x] = col_dist_sums[new_last_col_num][d][y][x];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
291
modules/photo/src/opencl/nlmeans.cl
Normal file
291
modules/photo/src/opencl/nlmeans.cl
Normal file
@@ -0,0 +1,291 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#ifdef cl_amd_printf
|
||||
#pragma OPENCL_EXTENSION cl_amd_printf:enable
|
||||
#endif
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef OP_CALC_WEIGHTS
|
||||
|
||||
__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist,
|
||||
FT almostDist2ActualDistMultiplier, int fixedPointMult,
|
||||
FT den, FT WEIGHT_THRESHOLD)
|
||||
{
|
||||
int almostDist = get_global_id(0);
|
||||
|
||||
if (almostDist < almostMaxDist)
|
||||
{
|
||||
FT dist = almostDist * almostDist2ActualDistMultiplier;
|
||||
int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den));
|
||||
|
||||
if (weight < WEIGHT_THRESHOLD * fixedPointMult)
|
||||
weight = 0;
|
||||
|
||||
almostDist2Weight[almostDist] = weight;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined OP_CALC_FASTNLMEANS
|
||||
|
||||
#define noconvert
|
||||
|
||||
#define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE)
|
||||
|
||||
inline int calcDist(uchar_t a, uchar_t b)
|
||||
{
|
||||
int_t diff = convert_int_t(a) - convert_int_t(b);
|
||||
int_t retval = diff * diff;
|
||||
|
||||
#if cn == 1
|
||||
return retval;
|
||||
#elif cn == 2
|
||||
return retval.x + retval.y;
|
||||
#else
|
||||
#error "cn should be either 1 or 2"
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t)
|
||||
{
|
||||
int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);
|
||||
int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);
|
||||
int_t retval = (A - B) * (A + B);
|
||||
|
||||
#if cn == 1
|
||||
return retval;
|
||||
#elif cn == 2
|
||||
return retval.x + retval.y;
|
||||
#else
|
||||
#error "cn should be either 1 or 2"
|
||||
#endif
|
||||
}
|
||||
|
||||
#define COND if (x == 0 && y == 0)
|
||||
|
||||
inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset,
|
||||
__local int * dists, int y, int x, int id,
|
||||
__global int * col_dists, __global int * up_col_dists)
|
||||
{
|
||||
y -= TEMPLATE_SIZE2;
|
||||
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;
|
||||
int col_dists_current_private[TEMPLATE_SIZE];
|
||||
|
||||
for (int i = id, size = SEARCH_SIZE_SQ; i < size; i += CTA_SIZE)
|
||||
{
|
||||
int dist = 0, value;
|
||||
|
||||
__global const uchar_t * src_template = (__global const uchar_t *)(src +
|
||||
mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
|
||||
__global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
|
||||
__global int * col_dists_current = col_dists + i * TEMPLATE_SIZE;
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < TEMPLATE_SIZE; ++j)
|
||||
col_dists_current_private[j] = 0;
|
||||
|
||||
for (int ty = 0; ty < TEMPLATE_SIZE; ++ty)
|
||||
{
|
||||
#pragma unroll
|
||||
for (int tx = -TEMPLATE_SIZE2; tx <= TEMPLATE_SIZE2; ++tx)
|
||||
{
|
||||
value = calcDist(src_template[tx], src_current[tx]);
|
||||
|
||||
col_dists_current_private[tx + TEMPLATE_SIZE2] += value;
|
||||
dist += value;
|
||||
}
|
||||
|
||||
src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
|
||||
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
|
||||
}
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < TEMPLATE_SIZE; ++j)
|
||||
col_dists_current[j] = col_dists_current_private[j];
|
||||
|
||||
dists[i] = dist;
|
||||
up_col_dists[0 + i] = col_dists[TEMPLATE_SIZE - 1];
|
||||
}
|
||||
}
|
||||
|
||||
inline void calcElementInFirstRow(__global const uchar * src, int src_step, int src_offset,
|
||||
__local int * dists, int y, int x0, int x, int id, int first,
|
||||
__global int * col_dists, __global int * up_col_dists)
|
||||
{
|
||||
x += TEMPLATE_SIZE2;
|
||||
y -= TEMPLATE_SIZE2;
|
||||
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;
|
||||
|
||||
for (int i = id, size = SEARCH_SIZE_SQ; i < size; i += CTA_SIZE)
|
||||
{
|
||||
__global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
|
||||
__global const uchar_t * src_template = (__global const uchar_t *)(src +
|
||||
mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
|
||||
__global int * col_dists_current = col_dists + TEMPLATE_SIZE * i;
|
||||
|
||||
int col_dist = 0;
|
||||
|
||||
#pragma unroll
|
||||
for (int ty = 0; ty < TEMPLATE_SIZE; ++ty)
|
||||
{
|
||||
col_dist += calcDist(src_current[0], src_template[0]);
|
||||
|
||||
src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
|
||||
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
|
||||
}
|
||||
|
||||
dists[i] += col_dist - col_dists_current[first];
|
||||
col_dists_current[first] = col_dist;
|
||||
up_col_dists[mad24(x0, SEARCH_SIZE_SQ, i)] = col_dist;
|
||||
}
|
||||
}
|
||||
|
||||
inline void calcElement(__global const uchar * src, int src_step, int src_offset,
|
||||
__local int * dists, int y, int x0, int x, int id, int first,
|
||||
__global int * col_dists, __global int * up_col_dists)
|
||||
{
|
||||
int sx = x + TEMPLATE_SIZE2;
|
||||
int sy_up = y - TEMPLATE_SIZE2 - 1;
|
||||
int sy_down = y + TEMPLATE_SIZE2;
|
||||
|
||||
uchar_t up_value = *(__global const uchar_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset)));
|
||||
uchar_t down_value = *(__global const uchar_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset)));
|
||||
|
||||
sx -= SEARCH_SIZE2;
|
||||
sy_up -= SEARCH_SIZE2;
|
||||
sy_down -= SEARCH_SIZE2;
|
||||
|
||||
for (int i = id, size = SEARCH_SIZE_SQ; i < size; i += CTA_SIZE)
|
||||
{
|
||||
int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE;
|
||||
|
||||
uchar_t up_value_t = *(__global const uchar_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset)));
|
||||
uchar_t down_value_t = *(__global const uchar_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset)));
|
||||
|
||||
__global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first);
|
||||
__global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i);
|
||||
|
||||
int col_dist = up_col_dists_current[0] + calcDistUpDown(down_value, down_value_t, up_value, up_value_t);
|
||||
|
||||
dists[i] += col_dist - col_dists_current[0];
|
||||
col_dists_current[0] = col_dist;
|
||||
up_col_dists_current[0] = col_dist;
|
||||
}
|
||||
}
|
||||
|
||||
inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,
|
||||
__local int * dists, __global const int * almostDist2Weight,
|
||||
__global uchar * dst, int dst_step, int dst_offset,
|
||||
int y, int x, int id, __local int * weights_local,
|
||||
__local int_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
|
||||
{
|
||||
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2, weights = 0;
|
||||
int_t weighted_sum = (int_t)(0);
|
||||
|
||||
for (int i = id, size = SEARCH_SIZE_SQ; i < size; i += CTA_SIZE)
|
||||
{
|
||||
int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset));
|
||||
int_t src_value = convert_int_t(*(__global const uchar_t *)(src + src_index));
|
||||
|
||||
int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift;
|
||||
int weight = almostDist2Weight[almostAvgDist];
|
||||
|
||||
weights += weight;
|
||||
weighted_sum += (int_t)(weight) * src_value;
|
||||
}
|
||||
|
||||
if (id >= CTA_SIZE2)
|
||||
{
|
||||
int id2 = id - CTA_SIZE2;
|
||||
weights_local[id2] = weights;
|
||||
weighted_sum_local[id2] = weighted_sum;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (id < CTA_SIZE2)
|
||||
{
|
||||
weights_local[id] += weights;
|
||||
weighted_sum_local[id] += weighted_sum;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int lsize = CTA_SIZE2 >> 1; lsize > 2; lsize >>= 1)
|
||||
{
|
||||
if (id < lsize)
|
||||
{
|
||||
int id2 = lsize + id;
|
||||
weights_local[id] += weights_local[id2];
|
||||
weighted_sum_local[id] += weighted_sum_local[id2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (id == 0)
|
||||
{
|
||||
int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset));
|
||||
int_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
|
||||
weighted_sum_local[2] + weighted_sum_local[3];
|
||||
int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
|
||||
|
||||
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / (int_t)(weights_local_0));
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
__global const int * almostDist2Weight, __global uchar * buffer,
|
||||
int almostTemplateWindowSizeSqBinShift)
|
||||
{
|
||||
int block_x = get_group_id(0), nblocks_x = get_num_groups(0);
|
||||
int block_y = get_group_id(1);
|
||||
int id = get_local_id(0), first;
|
||||
|
||||
__local int dists[SEARCH_SIZE_SQ], weights[CTA_SIZE2];
|
||||
__local int_t weighted_sum[CTA_SIZE2];
|
||||
|
||||
int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols);
|
||||
int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows);
|
||||
|
||||
// for each group we need SEARCH_SIZE_SQ * TEMPLATE_SIZE integer buffer for storing part column sum for current element
|
||||
// and SEARCH_SIZE_SQ * BLOCK_COLS integer buffer for storing last column sum for each element of search window of up row
|
||||
int block_data_start = SEARCH_SIZE_SQ * (mad24(block_y, dst_cols, x0) + mad24(block_y, nblocks_x, block_x) * TEMPLATE_SIZE);
|
||||
__global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int));
|
||||
__global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE;
|
||||
|
||||
for (int y = y0; y < y1; ++y)
|
||||
for (int x = x0; x < x1; ++x)
|
||||
{
|
||||
if (x == x0)
|
||||
{
|
||||
calcFirstElementInRow(src, src_step, src_offset, dists, y, x, id, col_dists, up_col_dists);
|
||||
first = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (y == y0)
|
||||
calcElementInFirstRow(src, src_step, src_offset, dists, y, x - x0, x, id, first, col_dists, up_col_dists);
|
||||
else
|
||||
calcElement(src, src_step, src_offset, dists, y, x - x0, x, id, first, col_dists, up_col_dists);
|
||||
|
||||
first = (first + 1) % TEMPLATE_SIZE;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
convolveWindow(src, src_step, src_offset, dists, almostDist2Weight, dst, dst_step, dst_offset,
|
||||
y, x, id, weights, weighted_sum, almostTemplateWindowSizeSqBinShift);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -46,6 +46,8 @@
|
||||
#include "opencv2/core/private.hpp"
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include "opencv2/photo.hpp"
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
#include "opencv2/photo/photo_tegra.hpp"
|
||||
|
95
modules/photo/test/ocl/test_denoising.cpp
Normal file
95
modules/photo/test/ocl/test_denoising.cpp
Normal file
@@ -0,0 +1,95 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
|
||||
{
|
||||
int cn, templateWindowSize, searchWindowSize;
|
||||
float h;
|
||||
bool use_roi;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst)
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
cn = GET_PARAM(0);
|
||||
use_roi = GET_PARAM(1);
|
||||
|
||||
templateWindowSize = 7;
|
||||
searchWindowSize = 21;
|
||||
h = 3.0f;
|
||||
}
|
||||
|
||||
virtual void generateTestData()
|
||||
{
|
||||
Mat image;
|
||||
if (cn == 1)
|
||||
{
|
||||
image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(image.empty());
|
||||
}
|
||||
|
||||
const int type = CV_8UC(cn);
|
||||
|
||||
Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE);
|
||||
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
|
||||
if (cn == 1)
|
||||
image.copyTo(src_roi);
|
||||
|
||||
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
|
||||
}
|
||||
};
|
||||
|
||||
typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising;
|
||||
|
||||
OCL_TEST_P(FastNlMeansDenoising, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
|
||||
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
|
||||
|
||||
OCL_EXPECT_MATS_NEAR(dst, 1)
|
||||
}
|
||||
}
|
||||
|
||||
typedef FastNlMeansDenoisingTestBase fastNlMeansDenoisingColored;
|
||||
|
||||
OCL_TEST_P(fastNlMeansDenoisingColored, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize));
|
||||
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize));
|
||||
|
||||
OCL_EXPECT_MATS_NEAR(dst, 1)
|
||||
}
|
||||
}
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, fastNlMeansDenoisingColored, Combine(Values(Channels(3)), Bool()));
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
Reference in New Issue
Block a user