hope it finally works

This commit is contained in:
Ilya Lavrenov 2014-02-18 19:23:38 +04:00
parent 38cbe9873a
commit 72c327fef8
3 changed files with 30 additions and 14 deletions

View File

@ -35,28 +35,35 @@ static int divUp(int a, int b)
return (a + b - 1) / b;
}
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, float h, int cn,
template <typename FT>
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
int & almostTemplateWindowSizeSqBinShift)
{
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
int depth = DataType<FT>::depth;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (depth == CV_64F && !doubleSupport)
return false;
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX)
int templateWindowSizeSq = templateWindowSize * templateWindowSize;
almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq);
float almostDist2ActualDistMultiplier = (float)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
const float WEIGHT_THRESHOLD = 1e-3f;
const FT WEIGHT_THRESHOLD = 1e-3f;
int maxDist = 255 * 255 * cn;
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
float den = 1.0f / (h * h * cn);
FT den = 1.0f / (h * h * cn);
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
"-D OP_CALC_WEIGHTS");
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
@ -99,7 +106,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
return false;
UMat almostDist2Weight;
if (!ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
almostTemplateWindowSizeSqBinShift))
return false;
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);

View File

@ -5,25 +5,38 @@
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#ifdef cl_amd_printf
#pragma OPENCL_EXTENSION cl_amd_printf:enable
#endif
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#ifdef OP_CALC_WEIGHTS
__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist,
float almostDist2ActualDistMultiplier, int fixedPointMult,
float den, float WEIGHT_THRESHOLD)
FT almostDist2ActualDistMultiplier, int fixedPointMult,
FT den, FT WEIGHT_THRESHOLD)
{
int almostDist = get_global_id(0);
if (almostDist < almostMaxDist)
{
float dist = almostDist * almostDist2ActualDistMultiplier;
FT dist = almostDist * almostDist2ActualDistMultiplier;
int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den));
if (weight < WEIGHT_THRESHOLD * fixedPointMult)
weight = 0;
almostDist2Weight[almostDist] = weight;
// printf("%d ", weight);
}
}
@ -193,7 +206,7 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
if (id == 0)
{
int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset));
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local[0]);
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local[0] / weights_local[0]);
}
}

View File

@ -94,10 +94,6 @@ OCL_TEST_P(FastNlMeansDenoising, Mat)
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
// Mat difference;
// cv::subtract(dst_roi, udst_roi, difference);
// print(difference);
OCL_EXPECT_MATS_NEAR(dst, 1)
}
}