diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index e81ee56a5..2a663b990 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -64,6 +64,7 @@ namespace cv { //////////////////////////////// OpenCL kernel strings ///////////////////// + extern const char *arithm_absdiff_nonsaturate; extern const char *arithm_nonzero; extern const char *arithm_sum; extern const char *arithm_minMax; @@ -435,14 +436,12 @@ Scalar cv::ocl::sqrSum(const oclMat &src) static sumFunc functab[3] = { arithmetic_sum, - arithmetic_sum, + arithmetic_sum, arithmetic_sum }; bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); - int ddepth = std::max(src.depth(), CV_32S); - if (!hasDouble && ddepth == CV_64F) - ddepth = CV_32F; + int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F); sumFunc func = functab[ddepth - CV_32S]; return func(src, SQR_SUM, ddepth); @@ -595,57 +594,102 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons double cv::ocl::norm(const oclMat &src1, int normType) { - return norm(src1, oclMat(src1.size(), src1.type(), Scalar::all(0)), normType); + CV_Assert((normType & NORM_RELATIVE) == 0); + return norm(src1, oclMat(), normType); +} + +static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff) +{ + CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0)); + Context *clCxt = src1.clCxt; + + int ddepth = CV_64F; + diff.create(src1.size(), CV_MAKE_TYPE(ddepth, src1.channels())); + + int oclChannels = src1.oclchannels(), sdepth = src1.depth(); + int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize(); + int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize(); + int diffstep1 = diff.step / diff.elemSize(), diffoffset1 = diff.offset / diff.elemSize(); + + string kernelName = "arithm_absdiff_nonsaturate"; + size_t localThreads[3] = { 16, 16, 1 }; + size_t globalThreads[3] = { diff.cols, diff.rows, 1 }; + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + + std::string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s", + typeMap[sdepth], channelMap[oclChannels], + typeMap[ddepth], channelMap[oclChannels], + typeMap[ddepth], channelMap[oclChannels]); + + vector > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 )); + + if (!src2.empty()) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); + + kernelName += "_binary"; + } + + args.push_back( make_pair( sizeof(cl_mem), (void *)&diff.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&diffstep1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&diffoffset1 )); + + args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); + + openCLExecuteKernel(clCxt, &arithm_absdiff_nonsaturate, + kernelName, globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) { + CV_Assert(!src1.empty()); + CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size())); + + if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + { + CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); + } + bool isRelative = (normType & NORM_RELATIVE) != 0; - normType &= 7; - CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2)); - int channels = src1.oclchannels(), i = 0, *p; + normType &= NORM_TYPE_MASK; + CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); + + Scalar s; + int cn = src1.channels(); double r = 0; - oclMat gm1(src1.size(), src1.type()); - int min_int = (normType == NORM_INF ? CL_INT_MIN : 0); - Mat m(1, 1, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(min_int)); - oclMat gm2(m), emptyMat; - switch(normType) + oclMat diff; + arithm_absdiff_nonsaturate_run(src1, src2, diff); + + switch (normType) { case NORM_INF: - // arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_minMax_run(gm1,emptyMat, gm2,"arithm_op_max"); - m = (gm2); - p = (int *)m.data; - r = -std::numeric_limits::max(); - for (i = 0; i < channels; i++) - { - r = std::max(r, (double)p[i]); - } + diff = diff.reshape(1); + minMax(diff, NULL, &r); break; case NORM_L1: - //arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_sum_run(gm1, gm2,"arithm_op_sum"); - m = (gm2); - p = (int *)m.data; - for (i = 0; i < channels; i++) - { - r = r + (double)p[i]; - } + s = sum(diff); + for (int i = 0; i < cn; ++i) + r += s[i]; break; case NORM_L2: - //arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_sum_run(gm1, gm2,"arithm_op_squares_sum"); - m = (gm2); - p = (int *)m.data; - for (i = 0; i < channels; i++) - { - r = r + (double)p[i]; - } + s = sqrSum(diff); + for (int i = 0; i < cn; ++i) + r += s[i]; r = std::sqrt(r); break; } if (isRelative) r = r / norm(src2, normType); + return r; } diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl new file mode 100644 index 000000000..e5d827139 --- /dev/null +++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl @@ -0,0 +1,93 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif + +__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset, + __global srcT *src2, int src2_step, int src2_offset, + __global dstT *dst, int dst_step, int dst_offset, + int cols, int rows) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, src1_step, x + src1_offset); + int src2_index = mad24(y, src2_step, x + src2_offset); + int dst_index = mad24(y, dst_step, x + dst_offset); + + dstT t0 = convertToDstT(src1[src1_index]); + dstT t1 = convertToDstT(src2[src2_index]); + dstT t2 = t0 - t1; + + dst[dst_index] = t2 >= 0 ? t2 : -t2; + } +} + +__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset, + __global dstT *dst, int dst_step, int dst_offset, + int cols, int rows) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, src1_step, x + src1_offset); + int dst_index = mad24(y, dst_step, x + dst_offset); + + dstT t0 = convertToDstT(src1[src1_index]); + + dst[dst_index] = t0 >= 0 ? t0 : -t0; + } +} diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index ac4842e23..db01d9503 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -220,8 +220,8 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool) cv::RNG &rng = TS::ptr()->get_rng(); - src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); - src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -15440, 14450, false); + src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 2, 11, false); + src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -1540, 1740, false); dst1 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); dst2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); mask = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), CV_8UC1, 0, 2, false); @@ -1440,7 +1440,7 @@ TEST_P(SetIdentity, Mat) } } -//////////////////////////////// setIdentity ///////////////////////////////////////////////// +//////////////////////////////// meanStdDev ///////////////////////////////////////////////// typedef ArithmTestBase MeanStdDev; @@ -1458,12 +1458,70 @@ TEST_P(MeanStdDev, Mat) for (int i = 0; i < 4; ++i) { - EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 1e-5); + EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1); EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1); } } } +//////////////////////////////// Norm ///////////////////////////////////////////////// + +typedef ArithmTestBase Norm; + +TEST_P(Norm, NORM_INF) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_INF; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + +TEST_P(Norm, NORM_L1) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_L1; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + +TEST_P(Norm, NORM_L2) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_L2; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); @@ -1495,5 +1553,6 @@ INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Ra INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); #endif // HAVE_OPENCL