From 7afbae57bab989e4b91fe4592fccc69604c638f8 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Sun, 27 Oct 2013 15:55:40 +0400 Subject: [PATCH] fixed ocl::minMax for FP-types --- modules/ocl/src/arithm.cpp | 12 +- modules/ocl/src/opencl/arithm_minMax.cl | 17 +- modules/ocl/src/opencl/arithm_minMax_mask.cl | 196 ------------------- modules/ocl/test/test_arithm.cpp | 45 ++++- 4 files changed, 56 insertions(+), 214 deletions(-) delete mode 100644 modules/ocl/src/opencl/arithm_minMax_mask.cl diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index c0328e16b..c28b1ea04 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -474,10 +474,14 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem ostringstream stream; stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()]; - stream << " -D MAX_VAL=" << (WT)numeric_limits::max(); - stream << " -D MIN_VAL=" << (numeric_limits::is_integer ? - (WT)numeric_limits::min() : -(WT)(std::numeric_limits::max())); - string buildOptions = stream.str(); + if (numeric_limits::is_integer) + { + stream << " -D MAX_VAL=" << (WT)numeric_limits::max(); + stream << " -D MIN_VAL=" << (WT)numeric_limits::min(); + } + else + stream << " -D DEPTH_" << src.depth(); + std::string buildOptions = stream.str(); vector > args; args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); diff --git a/modules/ocl/src/opencl/arithm_minMax.cl b/modules/ocl/src/opencl/arithm_minMax.cl index 45c8f524c..35f4cdd70 100644 --- a/modules/ocl/src/opencl/arithm_minMax.cl +++ b/modules/ocl/src/opencl/arithm_minMax.cl @@ -53,8 +53,13 @@ #endif #endif -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable +#ifdef DEPTH_5 +#define MIN_VAL (-FLT_MAX) +#define MAX_VAL FLT_MAX +#elif defined DEPTH_6 +#define MIN_VAL (-DBL_MAX) +#define MAX_VAL DBL_MAX +#endif /**************************************Array minMax**************************************/ @@ -78,14 +83,14 @@ __kernel void arithm_op_minMax(__global const T * src, __global T * dst, maxval = max(maxval, temp); } - if(lid > 127) + if (lid > 127) { localmem_min[lid - 128] = minval; localmem_max[lid - 128] = maxval; } barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) + if (lid < 128) { localmem_min[lid] = min(minval, localmem_min[lid]); localmem_max[lid] = max(maxval, localmem_max[lid]); @@ -138,14 +143,14 @@ __kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst, } } - if(lid > 127) + if (lid > 127) { localmem_min[lid - 128] = minval; localmem_max[lid - 128] = maxval; } barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) + if (lid < 128) { localmem_min[lid] = min(minval, localmem_min[lid]); localmem_max[lid] = max(maxval, localmem_max[lid]); diff --git a/modules/ocl/src/opencl/arithm_minMax_mask.cl b/modules/ocl/src/opencl/arithm_minMax_mask.cl deleted file mode 100644 index 3836e3cf1..000000000 --- a/modules/ocl/src/opencl/arithm_minMax_mask.cl +++ /dev/null @@ -1,196 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Shengen Yan,yanshengen@gmail.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -/**************************************PUBLICFUNC*************************************/ -#if defined (DOUBLE_SUPPORT) -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#endif - -#if defined (DEPTH_0) -#define VEC_TYPE uchar8 -#define TYPE uchar -#define CONVERT_TYPE convert_uchar8 -#define MIN_VAL 0 -#define MAX_VAL 255 -#endif -#if defined (DEPTH_1) -#define VEC_TYPE char8 -#define TYPE char -#define CONVERT_TYPE convert_char8 -#define MIN_VAL -128 -#define MAX_VAL 127 -#endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort8 -#define TYPE ushort -#define CONVERT_TYPE convert_ushort8 -#define MIN_VAL 0 -#define MAX_VAL 65535 -#endif -#if defined (DEPTH_3) -#define VEC_TYPE short8 -#define TYPE short -#define CONVERT_TYPE convert_short8 -#define MIN_VAL -32768 -#define MAX_VAL 32767 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int8 -#define TYPE int -#define CONVERT_TYPE convert_int8 -#define MIN_VAL INT_MIN -#define MAX_VAL INT_MAX -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float8 -#define TYPE float -#define CONVERT_TYPE convert_float8 -#define MIN_VAL (-FLT_MAX) -#define MAX_VAL FLT_MAX -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double8 -#define TYPE double -#define CONVERT_TYPE convert_double8 -#define MIN_VAL (-DBL_MAX) -#define MAX_VAL DBL_MAX -#endif - -#if defined (REPEAT_E0) -#define repeat_me(a) a = a; -#endif -#if defined (REPEAT_E1) -#define repeat_me(a) a.s7 = 0; -#endif -#if defined (REPEAT_E2) -#define repeat_me(a) a.s7 = 0;a.s6 = 0; -#endif -#if defined (REPEAT_E3) -#define repeat_me(a) a.s7 = 0;a.s6 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_E4) -#define repeat_me(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_E5) -#define repeat_me(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_E6) -#define repeat_me(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_E7) -#define repeat_me(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0; -#endif - -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable - -/**************************************Array minMax mask**************************************/ -__kernel void arithm_op_minMax_mask (int cols,int invalid_cols,int offset,int elemnum,int groupnum, __global TYPE *src, - int minvalid_cols,int moffset, __global uchar *mask,__global VEC_TYPE *dst) -{ - unsigned int lid = get_local_id(0); - unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); - unsigned int idx = id + (id / cols) * invalid_cols; - unsigned int midx = id + (id / cols) * minvalid_cols; - __local VEC_TYPE localmem_max[128],localmem_min[128]; - VEC_TYPE minval,maxval,temp,m_temp; - if(id < elemnum) - { - temp = vload8(idx, &src[offset]); - m_temp = CONVERT_TYPE(vload8(midx,&mask[moffset])); - if(id % cols == cols - 1) - { - repeat_me(m_temp); - } - minval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MAX_VAL; - maxval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MIN_VAL; - } - else - { - minval = MAX_VAL; - maxval = MIN_VAL; - } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) - { - idx = id + (id / cols) * invalid_cols; - midx = id + (id / cols) * minvalid_cols; - temp = vload8(idx, &src[offset]); - m_temp = CONVERT_TYPE(vload8(midx,&mask[moffset])); - if(id % cols == cols - 1) - { - repeat_me(m_temp); - } - minval = min(minval,m_temp != (VEC_TYPE)0 ? temp : minval); - maxval = max(maxval,m_temp != (VEC_TYPE)0 ? temp : maxval); - } - if(lid > 127) - { - localmem_min[lid - 128] = minval; - localmem_max[lid - 128] = maxval; - } - barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) - { - localmem_min[lid] = min(minval,localmem_min[lid]); - localmem_max[lid] = max(maxval,localmem_max[lid]); - } - barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) - { - if(lid < lsize) - { - int lid2 = lsize + lid; - localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]); - localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]); - } - barrier(CLK_LOCAL_MEM_FENCE); - } - if( lid == 0) - { - dst[gid] = localmem_min[0]; - dst[gid + groupnum] = localmem_max[0]; - } -} diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index 1d1b0f1ab..11b945c5b 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -126,8 +126,12 @@ PARAM_TEST_CASE(Lut, MatDepth, MatDepth, bool, bool) void Near(double threshold = 0.) { - EXPECT_MAT_NEAR(dst, Mat(gdst_whole), threshold); - EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), threshold); + Mat whole, roi; + gdst_whole.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst, whole, threshold); + EXPECT_MAT_NEAR(dst_roi, roi, threshold); } }; @@ -222,14 +226,22 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool) void Near(double threshold = 0.) { - EXPECT_MAT_NEAR(dst1, Mat(gdst1_whole), threshold); - EXPECT_MAT_NEAR(dst1_roi, Mat(gdst1_roi), threshold); + Mat whole, roi; + gdst1_whole.download(whole); + gdst1_roi.download(roi); + + EXPECT_MAT_NEAR(dst1, whole, threshold); + EXPECT_MAT_NEAR(dst1_roi, roi, threshold); } void Near1(double threshold = 0.) { - EXPECT_MAT_NEAR(dst2, Mat(gdst2_whole), threshold); - EXPECT_MAT_NEAR(dst2_roi, Mat(gdst2_roi), threshold); + Mat whole, roi; + gdst2_whole.download(whole); + gdst2_roi.download(roi); + + EXPECT_MAT_NEAR(dst2, whole, threshold); + EXPECT_MAT_NEAR(dst2_roi, roi, threshold); } }; @@ -724,6 +736,15 @@ OCL_TEST_P(MinMax, MAT) OCL_TEST_P(MinMax, MASK) { + enum { MAX_IDX = 0, MIN_IDX }; + static const double minMaxGolds[2][7] = + { + { std::numeric_limits::min(), std::numeric_limits::min(), std::numeric_limits::min(), + std::numeric_limits::min(), std::numeric_limits::min(), -std::numeric_limits::max(), -std::numeric_limits::max() }, + { std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::max(), + std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::max() }, + }; + for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); @@ -750,8 +771,16 @@ OCL_TEST_P(MinMax, MASK) double minVal_, maxVal_; cv::ocl::minMax(gsrc1_roi, &minVal_, &maxVal_, gmask_roi); - EXPECT_DOUBLE_EQ(minVal, minVal_); - EXPECT_DOUBLE_EQ(maxVal, maxVal_); + if (cv::countNonZero(mask_roi) == 0) + { + EXPECT_DOUBLE_EQ(minMaxGolds[MIN_IDX][depth], minVal_); + EXPECT_DOUBLE_EQ(minMaxGolds[MAX_IDX][depth], maxVal_); + } + else + { + EXPECT_DOUBLE_EQ(minVal, minVal_); + EXPECT_DOUBLE_EQ(maxVal, maxVal_); + } } }