opencv/modules/photo/src/fast_nlmeans_denoising_opencl.hpp

138 lines
5.4 KiB
C++
Raw Normal View History

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#ifndef __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
#define __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
#include "precomp.hpp"
#define CV_OPENCL_RUN_ASSERT
#include "opencl_kernels.hpp"
namespace cv {
enum
{
BLOCK_ROWS = 32,
BLOCK_COLS = 128,
CTA_SIZE = 128
};
static inline int getNearestPowerOf2(int value)
{
int p = 0;
while (1 << p < value)
++p;
return p;
}
static int divUp(int a, int b)
{
return (a + b - 1) / b;
}
2014-02-18 19:23:38 +04:00
template <typename FT>
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
int & almostTemplateWindowSizeSqBinShift)
{
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
2014-02-18 19:23:38 +04:00
int depth = DataType<FT>::depth;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (depth == CV_64F && !doubleSupport)
return false;
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX)
int templateWindowSizeSq = templateWindowSize * templateWindowSize;
almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq);
2014-02-18 19:23:38 +04:00
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
2014-02-18 19:23:38 +04:00
const FT WEIGHT_THRESHOLD = 1e-3f;
int maxDist = 255 * 255 * cn;
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
2014-02-18 19:23:38 +04:00
FT den = 1.0f / (h * h * cn);
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
2014-02-18 19:23:38 +04:00
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);
size_t globalsize[1] = { almostMaxDist };
return k.run(1, globalsize, NULL, false);
}
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int templateWindowSize, int searchWindowSize)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
Size size = _src.size();
if ( !(depth == CV_8U && cn <= 4 && cn != 3) )
return false;
int templateWindowHalfWize = templateWindowSize / 2;
int searchWindowHalfSize = searchWindowSize / 2;
templateWindowSize = templateWindowHalfWize * 2 + 1;
searchWindowSize = searchWindowHalfSize * 2 + 1;
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
int almostTemplateWindowSizeSqBinShift = -1;
char cvt[2][40];
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D CTA_SIZE2=%d -D convert_uchar_t=%s",
templateWindowSize, searchWindowSize, ocl::typeToStr(type),
ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, CTA_SIZE,
templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
CTA_SIZE >> 1, ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
if (k.empty())
return false;
UMat almostDist2Weight;
2014-02-18 19:23:38 +04:00
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
almostTemplateWindowSizeSqBinShift))
return false;
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
UMat srcex;
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
_dst.create(size, type);
UMat dst = _dst.getUMat();
2014-02-17 19:27:34 +04:00
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
Size colSumSize(nblocksx * templateWindowSize, searchWindowSizeSq * nblocksy);
UMat buffer(upColSumSize + colSumSize, CV_32SC(cn));
2014-02-17 19:27:34 +04:00
srcex = srcex(Rect(Point(borderSize, borderSize), size));
k.args(ocl::KernelArg::ReadOnlyNoSize(srcex), ocl::KernelArg::WriteOnly(dst),
2014-02-17 19:27:34 +04:00
ocl::KernelArg::PtrReadOnly(almostDist2Weight),
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
2014-02-17 19:27:34 +04:00
size_t globalsize[2] = { nblocksx * BLOCK_COLS, nblocksy * BLOCK_ROWS }, localsize[2] = { CTA_SIZE, 1 };
return k.run(2, globalsize, localsize, false);
}
}
#endif