hope it finally works
This commit is contained in:
parent
38cbe9873a
commit
72c327fef8
@ -35,28 +35,35 @@ static int divUp(int a, int b)
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, float h, int cn,
|
||||
template <typename FT>
|
||||
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
|
||||
int & almostTemplateWindowSizeSqBinShift)
|
||||
{
|
||||
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
|
||||
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
|
||||
int depth = DataType<FT>::depth;
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
if (depth == CV_64F && !doubleSupport)
|
||||
return false;
|
||||
|
||||
// precalc weight for every possible l2 dist between blocks
|
||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||
CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX)
|
||||
int templateWindowSizeSq = templateWindowSize * templateWindowSize;
|
||||
almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq);
|
||||
float almostDist2ActualDistMultiplier = (float)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
|
||||
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
|
||||
|
||||
const float WEIGHT_THRESHOLD = 1e-3f;
|
||||
const FT WEIGHT_THRESHOLD = 1e-3f;
|
||||
int maxDist = 255 * 255 * cn;
|
||||
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
|
||||
float den = 1.0f / (h * h * cn);
|
||||
FT den = 1.0f / (h * h * cn);
|
||||
|
||||
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
|
||||
|
||||
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
|
||||
"-D OP_CALC_WEIGHTS");
|
||||
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
@ -99,7 +106,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
|
||||
return false;
|
||||
|
||||
UMat almostDist2Weight;
|
||||
if (!ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
|
||||
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
|
||||
almostTemplateWindowSizeSqBinShift))
|
||||
return false;
|
||||
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
|
||||
|
@ -5,25 +5,38 @@
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#ifdef cl_amd_printf
|
||||
#pragma OPENCL_EXTENSION cl_amd_printf:enable
|
||||
#endif
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef OP_CALC_WEIGHTS
|
||||
|
||||
__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist,
|
||||
float almostDist2ActualDistMultiplier, int fixedPointMult,
|
||||
float den, float WEIGHT_THRESHOLD)
|
||||
FT almostDist2ActualDistMultiplier, int fixedPointMult,
|
||||
FT den, FT WEIGHT_THRESHOLD)
|
||||
{
|
||||
int almostDist = get_global_id(0);
|
||||
|
||||
if (almostDist < almostMaxDist)
|
||||
{
|
||||
float dist = almostDist * almostDist2ActualDistMultiplier;
|
||||
FT dist = almostDist * almostDist2ActualDistMultiplier;
|
||||
int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den));
|
||||
|
||||
if (weight < WEIGHT_THRESHOLD * fixedPointMult)
|
||||
weight = 0;
|
||||
|
||||
almostDist2Weight[almostDist] = weight;
|
||||
|
||||
// printf("%d ", weight);
|
||||
}
|
||||
}
|
||||
|
||||
@ -193,7 +206,7 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
|
||||
if (id == 0)
|
||||
{
|
||||
int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset));
|
||||
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local[0]);
|
||||
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local[0] / weights_local[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,10 +94,6 @@ OCL_TEST_P(FastNlMeansDenoising, Mat)
|
||||
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
|
||||
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
|
||||
|
||||
// Mat difference;
|
||||
// cv::subtract(dst_roi, udst_roi, difference);
|
||||
// print(difference);
|
||||
|
||||
OCL_EXPECT_MATS_NEAR(dst, 1)
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user