From 72c327fef882421d6403b00e66397b29efeae801 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 18 Feb 2014 19:23:38 +0400 Subject: [PATCH] hope it finally works --- .../src/fast_nlmeans_denoising_opencl.hpp | 19 +++++++++++------ modules/photo/src/opencl/nlmeans.cl | 21 +++++++++++++++---- modules/photo/test/ocl/test_denoising.cpp | 4 ---- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index cd28c1489..eab98998f 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -35,28 +35,35 @@ static int divUp(int a, int b) return (a + b - 1) / b; } -static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, float h, int cn, +template +static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn, int & almostTemplateWindowSizeSqBinShift) { const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255; int fixedPointMult = std::numeric_limits::max() / maxEstimateSumValue; + int depth = DataType::depth; + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if (depth == CV_64F && !doubleSupport) + return false; // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX) int templateWindowSizeSq = templateWindowSize * templateWindowSize; almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq); - float almostDist2ActualDistMultiplier = (float)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq; + FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq; - const float WEIGHT_THRESHOLD = 1e-3f; + const FT WEIGHT_THRESHOLD = 1e-3f; int maxDist = 255 * 255 * cn; int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1); - float den = 1.0f / (h * h * cn); + FT den = 1.0f / (h * h * cn); almostDist2Weight.create(1, almostMaxDist, CV_32SC1); ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc, - "-D OP_CALC_WEIGHTS"); + format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -99,7 +106,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, return false; UMat almostDist2Weight; - if (!ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn, + if (!ocl_calcAlmostDist2Weight(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn, almostTemplateWindowSizeSqBinShift)) return false; CV_Assert(almostTemplateWindowSizeSqBinShift >= 0); diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index f038e6ce1..ae1c3090b 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -5,25 +5,38 @@ // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. +#ifdef cl_amd_printf #pragma OPENCL_EXTENSION cl_amd_printf:enable +#endif + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + #ifdef OP_CALC_WEIGHTS __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist, - float almostDist2ActualDistMultiplier, int fixedPointMult, - float den, float WEIGHT_THRESHOLD) + FT almostDist2ActualDistMultiplier, int fixedPointMult, + FT den, FT WEIGHT_THRESHOLD) { int almostDist = get_global_id(0); if (almostDist < almostMaxDist) { - float dist = almostDist * almostDist2ActualDistMultiplier; + FT dist = almostDist * almostDist2ActualDistMultiplier; int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den)); if (weight < WEIGHT_THRESHOLD * fixedPointMult) weight = 0; almostDist2Weight[almostDist] = weight; + +// printf("%d ", weight); } } @@ -193,7 +206,7 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off if (id == 0) { int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset)); - *(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local[0]); + *(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local[0] / weights_local[0]); } } diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp index 721cdfc55..9f504d0a0 100644 --- a/modules/photo/test/ocl/test_denoising.cpp +++ b/modules/photo/test/ocl/test_denoising.cpp @@ -94,10 +94,6 @@ OCL_TEST_P(FastNlMeansDenoising, Mat) OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize)); OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize)); -// Mat difference; -// cv::subtract(dst_roi, udst_roi, difference); -// print(difference); - OCL_EXPECT_MATS_NEAR(dst, 1) } }