From 0db69fbe193b5f5925deebaff90d94f4ef7ba3d5 Mon Sep 17 00:00:00 2001 From: yao Date: Mon, 6 Aug 2012 15:00:27 +0800 Subject: [PATCH 01/35] Add blend, columnsum, pyrUp/down, matchTemplate for ocl module --- modules/ocl/include/opencv2/ocl/ocl.hpp | 39 +- modules/ocl/src/blend.cpp | 98 +++ modules/ocl/src/columnsum.cpp | 91 ++ modules/ocl/src/kernels/blend_linear.cl | 196 +++++ modules/ocl/src/kernels/imgproc_columnsum.cl | 80 ++ modules/ocl/src/kernels/match_template.cl | 824 +++++++++++++++++++ modules/ocl/src/kernels/pyr_down.cl | 500 +++++++++++ modules/ocl/src/kernels/pyr_up.cl | 750 +++++++++++++++++ modules/ocl/src/match_template.cpp | 560 +++++++++++++ modules/ocl/src/pyrdown.cpp | 115 +++ modules/ocl/src/pyrup.cpp | 88 ++ modules/ocl/test/test_blend.cpp | 82 ++ modules/ocl/test/test_columnsum.cpp | 105 +++ modules/ocl/test/test_match_template.cpp | 166 ++++ modules/ocl/test/test_pyrdown.cpp | 295 +++++++ modules/ocl/test/test_pyrup.cpp | 97 +++ 16 files changed, 4085 insertions(+), 1 deletion(-) create mode 100644 modules/ocl/src/blend.cpp create mode 100644 modules/ocl/src/columnsum.cpp create mode 100644 modules/ocl/src/kernels/blend_linear.cl create mode 100644 modules/ocl/src/kernels/imgproc_columnsum.cl create mode 100644 modules/ocl/src/kernels/match_template.cl create mode 100644 modules/ocl/src/kernels/pyr_down.cl create mode 100644 modules/ocl/src/kernels/pyr_up.cl create mode 100644 modules/ocl/src/match_template.cpp create mode 100644 modules/ocl/src/pyrdown.cpp create mode 100644 modules/ocl/src/pyrup.cpp create mode 100644 modules/ocl/test/test_blend.cpp create mode 100644 modules/ocl/test/test_columnsum.cpp create mode 100644 modules/ocl/test/test_match_template.cpp create mode 100644 modules/ocl/test/test_pyrdown.cpp create mode 100644 modules/ocl/test/test_pyrup.cpp diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 0efc72283..517e9ffce 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -858,7 +858,44 @@ namespace cv void benchmark_copy_vectorize(const oclMat &src, oclMat &dst); void benchmark_copy_offset_stride(const oclMat &src, oclMat &dst); void benchmark_ILP(); - + + //! computes vertical sum, supports only CV_32FC1 images + CV_EXPORTS void columnSum(const oclMat& src, oclMat& sum); + + //! performs linear blending of two images + //! to avoid accuracy errors sum of weigths shouldn't be very close to zero + // supports only CV_8UC1 source type + CV_EXPORTS void blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, oclMat& result); + + /////////////////////////////// Pyramid ///////////////////////////////////// + CV_EXPORTS void pyrDown(const oclMat& src, oclMat& dst); + + //! upsamples the source image and then smoothes it + CV_EXPORTS void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst); + + ///////////////////////////////////////// match_template ///////////////////////////////////////////////////////////// + struct CV_EXPORTS MatchTemplateBuf + { + Size user_block_size; + oclMat imagef, templf; + std::vector images; + std::vector image_sums; + std::vector image_sqsums; + }; + + + //! computes the proximity map for the raster template and the image where the template is searched for + // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 + // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 + CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method); + + //! computes the proximity map for the raster template and the image where the template is searched for + // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 + // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 + CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf); + + + } } #include "opencv2/ocl/matrix_operations.hpp" diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp new file mode 100644 index 000000000..a9df907d3 --- /dev/null +++ b/modules/ocl/src/blend.cpp @@ -0,0 +1,98 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Nathan, liujun@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +#if !defined (HAVE_OPENCL) +void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, + oclMat& result){throw_nogpu();} +#else +namespace cv +{ + namespace ocl + { + ////////////////////////////////////OpenCL kernel strings////////////////////////// + extern const char *blend_linear; + } +} + +void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, + oclMat& result) +{ + cv::ocl::Context *ctx = img1.clCxt; + assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt); + int channels = img1.channels(); + int depth = img1.depth(); + int rows = img1.rows; + int cols = img1.cols; + int istep = img1.step; + int wstep = weights1.step; + size_t globalSize[] = {cols * channels, rows, 1}; + size_t localSize[] = {16, 16, 1}; + + vector< pair > args; + + if(globalSize[0]!=0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&istep )); + args.push_back( make_pair( sizeof(cl_int), (void *)&wstep )); + std::string kernelName = "BlendLinear"; + + openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth); + } +} +#endif \ No newline at end of file diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp new file mode 100644 index 000000000..e789d38b0 --- /dev/null +++ b/modules/ocl/src/columnsum.cpp @@ -0,0 +1,91 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Chunpeng Zhang, chunpeng@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; +using namespace std; + + +#if !defined(HAVE_OPENCL) + +void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); } + +#else /*!HAVE_OPENCL */ + +namespace cv +{ + namespace ocl + { + extern const char* imgproc_columnsum; + } +} + +void cv::ocl::columnSum(const oclMat& src,oclMat& dst) +{ + CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size()); + + Context *clCxt = src.clCxt; + + const std::string kernelName = "columnSum"; + + std::vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); + + size_t globalThreads[3] = {dst.cols, dst.rows, 1}; + size_t localThreads[3] = {16, 16, 1}; + + openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + +} +#endif \ No newline at end of file diff --git a/modules/ocl/src/kernels/blend_linear.cl b/modules/ocl/src/kernels/blend_linear.cl new file mode 100644 index 000000000..bf733576c --- /dev/null +++ b/modules/ocl/src/kernels/blend_linear.cl @@ -0,0 +1,196 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, MulticoreWare Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Liu Liujun, liujun@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +__kernel void BlendLinear_C1_D0( + __global uchar *dst, + __global uchar *img1, + __global uchar *img2, + __global float *weight1, + __global float *weight2, + int rows, + int cols, + int istep, + int wstep + ) +{ + int idx = get_global_id(0); + int idy = get_global_id(1); + if (idx < cols && idy < rows) + { + int pos = idy * istep + idx; + int wpos = idy * (wstep /sizeof(float)) + idx; + float w1 = weight1[wpos]; + float w2 = weight2[wpos]; + dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); + + } +} + +__kernel void BlendLinear_C3_D0( + __global uchar *dst, + __global uchar *img1, + __global uchar *img2, + __global float *weight1, + __global float *weight2, + int rows, + int cols, + int istep, + int wstep + ) +{ + int idx = get_global_id(0); + int idy = get_global_id(1); + int x = idx / 3; + int y = idy; + if (x < cols && y < rows) + { + int pos = idy * istep + idx; + int wpos = idy * (wstep /sizeof(float)) + x; + float w1 = weight1[wpos]; + float w2 = weight2[wpos]; + dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); + } +} + +__kernel void BlendLinear_C4_D0( + __global uchar *dst, + __global uchar *img1, + __global uchar *img2, + __global float *weight1, + __global float *weight2, + int rows, + int cols, + int istep, + int wstep + ) +{ + int idx = get_global_id(0); + int idy = get_global_id(1); + int x = idx / 4; + int y = idy; + if (x < cols && y < rows) + { + int pos = idy * istep + idx; + int wpos = idy * (wstep /sizeof(float)) + x; + float w1 = weight1[wpos]; + float w2 = weight2[wpos]; + dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); + } +} + +__kernel void BlendLinear_C1_D5( + __global float *dst, + __global float *img1, + __global float *img2, + __global float *weight1, + __global float *weight2, + int rows, + int cols, + int istep, + int wstep + ) +{ + int idx = get_global_id(0); + int idy = get_global_id(1); + if (idx < cols && idy < rows) + { + int pos = idy * (istep / sizeof(float)) + idx; + int wpos = idy * (wstep /sizeof(float)) + idx; + float w1 = weight1[wpos]; + float w2 = weight2[wpos]; + dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); + } +} + +__kernel void BlendLinear_C3_D5( + __global float *dst, + __global float *img1, + __global float *img2, + __global float *weight1, + __global float *weight2, + int rows, + int cols, + int istep, + int wstep + ) +{ + int idx = get_global_id(0); + int idy = get_global_id(1); + int x = idx / 3; + int y = idy; + if (x < cols && y < rows) + { + int pos = idy * (istep / sizeof(float)) + idx; + int wpos = idy * (wstep /sizeof(float)) + x; + float w1 = weight1[wpos]; + float w2 = weight2[wpos]; + dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); + } +} + +__kernel void BlendLinear_C4_D5( + __global float *dst, + __global float *img1, + __global float *img2, + __global float *weight1, + __global float *weight2, + int rows, + int cols, + int istep, + int wstep + ) +{ + int idx = get_global_id(0); + int idy = get_global_id(1); + int x = idx / 4; + int y = idy; + if (x < cols && y < rows) + { + int pos = idy * (istep / sizeof(float)) + idx; + int wpos = idy * (wstep /sizeof(float)) + x; + float w1 = weight1[wpos]; + float w2 = weight2[wpos]; + dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); + } +} diff --git a/modules/ocl/src/kernels/imgproc_columnsum.cl b/modules/ocl/src/kernels/imgproc_columnsum.cl new file mode 100644 index 000000000..913b417d1 --- /dev/null +++ b/modules/ocl/src/kernels/imgproc_columnsum.cl @@ -0,0 +1,80 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Chunpeng Zhang chunpeng@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#if defined (__ATI__) +#pragma OPENCL EXTENSION cl_amd_fp64:enable + +#elif defined (__NVIDIA__) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif + +//////////////////////////////////////////////////////////////////// +///////////////////////// columnSum //////////////////////////////// +//////////////////////////////////////////////////////////////////// +/// CV_32FC1 +__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep) +{ + const int x = get_global_id(0); + + srcStep >>= 2; + dstStep >>= 2; + + if (x < srcCols) + { + int srcIdx = x ; + int dstIdx = x ; + + float sum = 0; + + for (int y = 0; y < srcRows; ++y) + { + sum += src[srcIdx]; + dst[dstIdx] = sum; + srcIdx += srcStep; + dstIdx += dstStep; + } + } +} diff --git a/modules/ocl/src/kernels/match_template.cl b/modules/ocl/src/kernels/match_template.cl new file mode 100644 index 000000000..4c5a4fc9c --- /dev/null +++ b/modules/ocl/src/kernels/match_template.cl @@ -0,0 +1,824 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#pragma OPENCL EXTENSION cl_amd_printf : enable + +#if defined (__ATI__) +#pragma OPENCL EXTENSION cl_amd_fp64:enable + +#elif defined (__NVIDIA__) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif + +#if !defined(USE_SQR_INTEGRAL) && (defined (__ATI__) || defined (__NVIDIA__)) +#define TYPE_IMAGE_SQSUM double +#else +#define TYPE_IMAGE_SQSUM ulong +#endif + +////////////////////////////////////////////////// +// utilities +#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, gidx + img_sqsums_offset + ox) +#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox) +// normAcc* are accurate normalization routines which make GPU matchTemplate +// consistent with CPU one +float normAcc(float num, float denum) +{ + if(fabs(num) < denum) + { + return num / denum; + } + if(fabs(num) < denum * 1.125f) + { + return num > 0 ? 1 : -1; + } + return 0; +} + +float normAcc_SQDIFF(float num, float denum) +{ + if(fabs(num) < denum) + { + return num / denum; + } + if(fabs(num) < denum * 1.125f) + { + return num > 0 ? 1 : -1; + } + return 1; +} +////////////////////////////////////////////////////////////////////// +// normalize + +__kernel +void normalizeKernel_C1_D0 +( + __global const TYPE_IMAGE_SQSUM * img_sqsums, + __global float * res, + ulong tpl_sqsum, + int res_rows, + int res_cols, + int tpl_rows, + int tpl_cols, + int img_sqsums_offset, + int img_sqsums_step, + int res_offset, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + img_sqsums_step /= sizeof(*img_sqsums); + img_sqsums_offset /= sizeof(*img_sqsums); + int res_idx = mad24(gidy, res_step, res_offset + gidx); + if(gidx < res_cols && gidy < res_rows) + { + float image_sqsum_ = (float)( + (img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) - + (img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)])); + res[res_idx] = normAcc(res[res_idx], sqrt(image_sqsum_ * tpl_sqsum)); + } +} + +__kernel +void matchTemplate_Prepared_SQDIFF_C1_D0 +( + __global const TYPE_IMAGE_SQSUM * img_sqsums, + __global float * res, + ulong tpl_sqsum, + int res_rows, + int res_cols, + int tpl_rows, + int tpl_cols, + int img_sqsums_offset, + int img_sqsums_step, + int res_offset, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + img_sqsums_step /= sizeof(*img_sqsums); + img_sqsums_offset /= sizeof(*img_sqsums); + int res_idx = mad24(gidy, res_step, res_offset + gidx); + if(gidx < res_cols && gidy < res_rows) + { + float image_sqsum_ = (float)( + (img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) - + (img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)])); + res[res_idx] = image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum; + } +} + +__kernel +void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0 +( + __global const TYPE_IMAGE_SQSUM * img_sqsums, + __global float * res, + ulong tpl_sqsum, + int res_rows, + int res_cols, + int tpl_rows, + int tpl_cols, + int img_sqsums_offset, + int img_sqsums_step, + int res_offset, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + img_sqsums_step /= sizeof(*img_sqsums); + img_sqsums_offset /= sizeof(*img_sqsums); + int res_idx = mad24(gidy, res_step, res_offset + gidx); + if(gidx < res_cols && gidy < res_rows) + { + float image_sqsum_ = (float)( + (img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) - + (img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)])); + res[res_idx] = normAcc_SQDIFF(image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum, + sqrt(image_sqsum_ * tpl_sqsum)); + } +} + +////////////////////////////////////////////////// +// SQDIFF +__kernel +void matchTemplate_Naive_SQDIFF_C1_D0 +( + __global const uchar * img, + __global const uchar * tpl, + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int img_offset, + int tpl_offset, + int res_offset, + int img_step, + int tpl_step, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int i,j; + int delta; + int sum = 0; + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + for(i = 0; i < tpl_rows; i ++) + { + // get specific rows of img data + __global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); + __global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); + for(j = 0; j < tpl_cols; j ++) + { + delta = img_ptr[j] - tpl_ptr[j]; + sum = mad24(delta, delta, sum); + } + } + res[res_idx] = sum; + } +} + +__kernel +void matchTemplate_Naive_SQDIFF_C1_D5 +( + __global const float * img, + __global const float * tpl, + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int img_offset, + int tpl_offset, + int res_offset, + int img_step, + int tpl_step, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int i,j; + float delta; + float sum = 0; + img_step /= sizeof(*img); + img_offset /= sizeof(*img); + tpl_step /= sizeof(*tpl); + tpl_offset /= sizeof(*tpl); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + for(i = 0; i < tpl_rows; i ++) + { + // get specific rows of img data + __global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); + __global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); + for(j = 0; j < tpl_cols; j ++) + { + delta = img_ptr[j] - tpl_ptr[j]; + sum = mad(delta, delta, sum); + } + } + res[res_idx] = sum; + } +} + +__kernel +void matchTemplate_Naive_SQDIFF_C4_D0 +( + __global const uchar4 * img, + __global const uchar4 * tpl, + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int img_offset, + int tpl_offset, + int res_offset, + int img_step, + int tpl_step, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int i,j; + int4 delta; + int4 sum = (int4)(0, 0, 0, 0); + img_step /= sizeof(*img); + img_offset /= sizeof(*img); + tpl_step /= sizeof(*tpl); + tpl_offset /= sizeof(*tpl); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + for(i = 0; i < tpl_rows; i ++) + { + // get specific rows of img data + __global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); + __global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); + for(j = 0; j < tpl_cols; j ++) + { + //delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect + delta.x = img_ptr[j].x - tpl_ptr[j].x; + delta.y = img_ptr[j].y - tpl_ptr[j].y; + delta.z = img_ptr[j].z - tpl_ptr[j].z; + delta.w = img_ptr[j].w - tpl_ptr[j].w; + sum = mad24(delta, delta, sum); + } + } + res[res_idx] = sum.x + sum.y + sum.z + sum.w; + } +} + +__kernel +void matchTemplate_Naive_SQDIFF_C4_D5 +( + __global const float4 * img, + __global const float4 * tpl, + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int img_offset, + int tpl_offset, + int res_offset, + int img_step, + int tpl_step, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int i,j; + float4 delta; + float4 sum = (float4)(0, 0, 0, 0); + img_step /= sizeof(*img); + img_offset /= sizeof(*img); + tpl_step /= sizeof(*tpl); + tpl_offset /= sizeof(*tpl); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + for(i = 0; i < tpl_rows; i ++) + { + // get specific rows of img data + __global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); + __global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); + for(j = 0; j < tpl_cols; j ++) + { + //delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect + delta.x = img_ptr[j].x - tpl_ptr[j].x; + delta.y = img_ptr[j].y - tpl_ptr[j].y; + delta.z = img_ptr[j].z - tpl_ptr[j].z; + delta.w = img_ptr[j].w - tpl_ptr[j].w; + sum = mad(delta, delta, sum); + } + } + res[res_idx] = sum.x + sum.y + sum.z + sum.w; + } +} + +////////////////////////////////////////////////// +// CCORR +__kernel +void matchTemplate_Naive_CCORR_C1_D0 +( + __global const uchar * img, + __global const uchar * tpl, + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int img_offset, + int tpl_offset, + int res_offset, + int img_step, + int tpl_step, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int i,j; + int sum = 0; + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + for(i = 0; i < tpl_rows; i ++) + { + // get specific rows of img data + __global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); + __global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); + for(j = 0; j < tpl_cols; j ++) + { + sum = mad24(img_ptr[j], tpl_ptr[j], sum); + } + } + res[res_idx] = sum; + } +} + +__kernel +void matchTemplate_Naive_CCORR_C1_D5 +( + __global const float * img, + __global const float * tpl, + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int img_offset, + int tpl_offset, + int res_offset, + int img_step, + int tpl_step, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int i,j; + float sum = 0; + img_step /= sizeof(*img); + img_offset /= sizeof(*img); + tpl_step /= sizeof(*tpl); + tpl_offset /= sizeof(*tpl); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + for(i = 0; i < tpl_rows; i ++) + { + // get specific rows of img data + __global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); + __global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); + for(j = 0; j < tpl_cols; j ++) + { + sum = mad(img_ptr[j], tpl_ptr[j], sum); + } + } + res[res_idx] = sum; + } +} + +__kernel +void matchTemplate_Naive_CCORR_C4_D0 +( + __global const uchar4 * img, + __global const uchar4 * tpl, + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int img_offset, + int tpl_offset, + int res_offset, + int img_step, + int tpl_step, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int i,j; + int4 sum = (int4)(0, 0, 0, 0); + img_step /= sizeof(*img); + img_offset /= sizeof(*img); + tpl_step /= sizeof(*tpl); + tpl_offset /= sizeof(*tpl); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + for(i = 0; i < tpl_rows; i ++) + { + // get specific rows of img data + __global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); + __global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); + for(j = 0; j < tpl_cols; j ++) + { + sum = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum); + } + } + res[res_idx] = sum.x + sum.y + sum.z + sum.w; + } +} + +__kernel +void matchTemplate_Naive_CCORR_C4_D5 +( + __global const float4 * img, + __global const float4 * tpl, + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int img_offset, + int tpl_offset, + int res_offset, + int img_step, + int tpl_step, + int res_step +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int i,j; + float4 sum = (float4)(0, 0, 0, 0); + img_step /= sizeof(*img); + img_offset /= sizeof(*img); + tpl_step /= sizeof(*tpl); + tpl_offset /= sizeof(*tpl); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + for(i = 0; i < tpl_rows; i ++) + { + // get specific rows of img data + __global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); + __global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); + for(j = 0; j < tpl_cols; j ++) + { + sum = mad(convert_float4(img_ptr[j]), convert_float4(tpl_ptr[j]), sum); + } + } + res[res_idx] = sum.x + sum.y + sum.z + sum.w; + } +} + +////////////////////////////////////////////////// +// CCOFF +__kernel +void matchTemplate_Prepared_CCOFF_C1_D0 +( + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int res_offset, + int res_step, + __global const uint * img_sums, + int img_sums_offset, + int img_sums_step, + float tpl_sum +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + img_sums_offset /= sizeof(*img_sums); + img_sums_step /= sizeof(*img_sums); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + float sum = (float)( + (img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)]) + - (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)])); + res[res_idx] -= sum * tpl_sum; + } +} +__kernel +void matchTemplate_Prepared_CCOFF_C4_D0 +( + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int res_offset, + int res_step, + __global const uint * img_sums_c0, + __global const uint * img_sums_c1, + __global const uint * img_sums_c2, + __global const uint * img_sums_c3, + int img_sums_offset, + int img_sums_step, + float tpl_sum_c0, + float tpl_sum_c1, + float tpl_sum_c2, + float tpl_sum_c3 +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + img_sums_offset /= sizeof(*img_sums_c0); + img_sums_step /= sizeof(*img_sums_c0); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + float ccorr = res[res_idx]; + ccorr -= tpl_sum_c0*(float)( + (img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)]) + - (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)])); + ccorr -= tpl_sum_c1*(float)( + (img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)]) + - (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)])); + ccorr -= tpl_sum_c2*(float)( + (img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)]) + - (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)])); + ccorr -= tpl_sum_c3*(float)( + (img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)]) + - (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)])); + res[res_idx] = ccorr; + } +} + +__kernel +void matchTemplate_Prepared_CCOFF_NORMED_C1_D0 +( + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int res_offset, + int res_step, + float weight, + __global const uint * img_sums, + int img_sums_offset, + int img_sums_step, + __global const TYPE_IMAGE_SQSUM * img_sqsums, + int img_sqsums_offset, + int img_sqsums_step, + float tpl_sum, + float tpl_sqsum +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + img_sqsums_step /= sizeof(*img_sqsums); + img_sqsums_offset /= sizeof(*img_sqsums); + img_sums_offset /= sizeof(*img_sums); + img_sums_step /= sizeof(*img_sums); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + float image_sum_ = (float)( + (img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)]) + - (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)])); + + float image_sqsum_ = (float)( + (img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) - + (img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)])); + res[res_idx] = normAcc(res[res_idx] - image_sum_ * tpl_sum, + sqrt(tpl_sqsum * (image_sqsum_ - weight * image_sum_ * image_sum_))); + } +} +__kernel +void matchTemplate_Prepared_CCOFF_NORMED_C4_D0 +( + __global float * res, + int img_rows, + int img_cols, + int tpl_rows, + int tpl_cols, + int res_rows, + int res_cols, + int res_offset, + int res_step, + float weight, + __global const uint * img_sums_c0, + __global const uint * img_sums_c1, + __global const uint * img_sums_c2, + __global const uint * img_sums_c3, + int img_sums_offset, + int img_sums_step, + __global const TYPE_IMAGE_SQSUM * img_sqsums_c0, + __global const TYPE_IMAGE_SQSUM * img_sqsums_c1, + __global const TYPE_IMAGE_SQSUM * img_sqsums_c2, + __global const TYPE_IMAGE_SQSUM * img_sqsums_c3, + int img_sqsums_offset, + int img_sqsums_step, + float tpl_sum_c0, + float tpl_sum_c1, + float tpl_sum_c2, + float tpl_sum_c3, + float tpl_sqsum +) +{ + int gidx = get_global_id(0); + int gidy = get_global_id(1); + + img_sqsums_step /= sizeof(*img_sqsums_c0); + img_sqsums_offset /= sizeof(*img_sqsums_c0); + img_sums_offset /= sizeof(*img_sums_c0); + img_sums_step /= sizeof(*img_sums_c0); + res_step /= sizeof(*res); + res_offset /= sizeof(*res); + + int res_idx = mad24(gidy, res_step, res_offset + gidx); + + if(gidx < res_cols && gidy < res_rows) + { + float image_sum_c0 = (float)( + (img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)]) + - (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)])); + float image_sum_c1 = (float)( + (img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)]) + - (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)])); + float image_sum_c2 = (float)( + (img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)]) + - (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)])); + float image_sum_c3 = (float)( + (img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)]) + - (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)])); + + float image_sqsum_c0 = (float)( + (img_sqsums_c0[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(tpl_cols, 0)]) - + (img_sqsums_c0[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(0, 0)])); + float image_sqsum_c1 = (float)( + (img_sqsums_c1[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(tpl_cols, 0)]) - + (img_sqsums_c1[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(0, 0)])); + float image_sqsum_c2 = (float)( + (img_sqsums_c2[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(tpl_cols, 0)]) - + (img_sqsums_c2[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(0, 0)])); + float image_sqsum_c3 = (float)( + (img_sqsums_c3[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(tpl_cols, 0)]) - + (img_sqsums_c3[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(0, 0)])); + + float num = res[res_idx] - + image_sum_c0 * tpl_sum_c0 - + image_sum_c1 * tpl_sum_c1 - + image_sum_c2 * tpl_sum_c2 - + image_sum_c3 * tpl_sum_c3; + float denum = sqrt( tpl_sqsum * ( + image_sqsum_c0 - weight * image_sum_c0 * image_sum_c0 + + image_sqsum_c1 - weight * image_sum_c1 * image_sum_c1 + + image_sqsum_c2 - weight * image_sum_c2 * image_sum_c2 + + image_sqsum_c3 - weight * image_sum_c0 * image_sum_c3) + ); + res[res_idx] = normAcc(num, denum); + } +} + diff --git a/modules/ocl/src/kernels/pyr_down.cl b/modules/ocl/src/kernels/pyr_down.cl new file mode 100644 index 000000000..38b4ec7c7 --- /dev/null +++ b/modules/ocl/src/kernels/pyr_down.cl @@ -0,0 +1,500 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Dachuan Zhao, dachuan@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#pragma OPENCL EXTENSION cl_amd_printf : enable + + +uchar round_uchar_uchar(uchar v) +{ + return v; +} + +uchar round_uchar_int(int v) +{ + return (uchar)((uint)v <= 255 ? v : v > 0 ? 255 : 0); +} + +uchar round_uchar_float(float v) +{ + if(v - convert_int_sat_rte(v) > 1e-6 || v - convert_int_sat_rte(v) < -1e-6) + { + if(((int)v + 1) - (v + 0.5f) < 1e-6 && ((int)v + 1) - (v + 0.5f) > -1e-6) + { + v = (int)v + 0.51f; + } + } + int iv = convert_int_sat_rte(v); + return round_uchar_int(iv); +} + +uchar4 round_uchar4_uchar4(uchar4 v) +{ + return v; +} + +uchar4 round_uchar4_int4(int4 v) +{ + uchar4 result; + result.x = (uchar)(v.x <= 255 ? v.x : v.x > 0 ? 255 : 0); + result.y = (uchar)(v.y <= 255 ? v.y : v.y > 0 ? 255 : 0); + result.z = (uchar)(v.z <= 255 ? v.z : v.z > 0 ? 255 : 0); + result.w = (uchar)(v.w <= 255 ? v.w : v.w > 0 ? 255 : 0); + return result; +} + +uchar4 round_uchar4_float4(float4 v) +{ + if(v.x - convert_int_sat_rte(v.x) > 1e-6 || v.x - convert_int_sat_rte(v.x) < -1e-6) + { + if(((int)(v.x) + 1) - (v.x + 0.5f) < 1e-6 && ((int)(v.x) + 1) - (v.x + 0.5f) > -1e-6) + { + v.x = (int)(v.x) + 0.51f; + } + } + if(v.y - convert_int_sat_rte(v.y) > 1e-6 || v.y - convert_int_sat_rte(v.y) < -1e-6) + { + if(((int)(v.y) + 1) - (v.y + 0.5f) < 1e-6 && ((int)(v.y) + 1) - (v.y + 0.5f) > -1e-6) + { + v.y = (int)(v.y) + 0.51f; + } + } + if(v.z - convert_int_sat_rte(v.z) > 1e-6 || v.z - convert_int_sat_rte(v.z) < -1e-6) + { + if(((int)(v.z) + 1) - (v.z + 0.5f) < 1e-6 && ((int)(v.z) + 1) - (v.z + 0.5f) > -1e-6) + { + v.z = (int)(v.z) + 0.51f; + } + } + if(v.w - convert_int_sat_rte(v.w) > 1e-6 || v.w - convert_int_sat_rte(v.w) < -1e-6) + { + if(((int)(v.w) + 1) - (v.w + 0.5f) < 1e-6 && ((int)(v.w) + 1) - (v.w + 0.5f) > -1e-6) + { + v.w = (int)(v.w) + 0.51f; + } + } + int4 iv = convert_int4_sat_rte(v); + return round_uchar4_int4(iv); +} + + + + +int idx_row_low(int y, int last_row) +{ + if(y < 0) + { + y = -y; + } + return y % (last_row + 1); +} + +int idx_row_high(int y, int last_row) +{ + int i; + int j; + if(last_row - y < 0) + { + i = (y - last_row); + } + else + { + i = (last_row - y); + } + if(last_row - i < 0) + { + j = i - last_row; + } + else + { + j = last_row - i; + } + return j % (last_row + 1); +} + +int idx_row(int y, int last_row) +{ + return idx_row_low(idx_row_high(y, last_row), last_row); +} + +int idx_col_low(int x, int last_col) +{ + if(x < 0) + { + x = -x; + } + return x % (last_col + 1); +} + +int idx_col_high(int x, int last_col) +{ + int i; + int j; + if(last_col - x < 0) + { + i = (x - last_col); + } + else + { + i = (last_col - x); + } + if(last_col - i < 0) + { + j = i - last_col; + } + else + { + j = last_col - i; + } + return j % (last_col + 1); +} + +int idx_col(int x, int last_col) +{ + return idx_col_low(idx_col_high(x, last_col), last_col); +} + +__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols) +{ + const int x = get_group_id(0) * get_local_size(0) + get_local_id(0); + const int y = get_group_id(1); + + __local float smem[256 + 4]; + + float sum; + + const int src_y = 2*y; + const int last_row = srcRows - 1; + const int last_col = srcCols - 1; + + sum = 0; + + sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]); + sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]); + sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)]); + sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]); + sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]); + + smem[2 + get_local_id(0)] = sum; + + if (get_local_id(0) < 2) + { + const int left_x = x - 2; + + sum = 0; + + sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]); + sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]); + sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)]); + sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]); + sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]); + + smem[get_local_id(0)] = sum; + } + + if (get_local_id(0) > 253) + { + const int right_x = x + 2; + + sum = 0; + + sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]); + sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]); + sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)]); + sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]); + sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]); + + smem[4 + get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < 128) + { + const int tid2 = get_local_id(0) * 2; + + sum = 0; + + sum = sum + 0.0625f * smem[2 + tid2 - 2]; + sum = sum + 0.25f * smem[2 + tid2 - 1]; + sum = sum + 0.375f * smem[2 + tid2 ]; + sum = sum + 0.25f * smem[2 + tid2 + 1]; + sum = sum + 0.0625f * smem[2 + tid2 + 2]; + + const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; + + if (dst_x < dstCols) + dst[y * dstStep + dst_x] = round_uchar_float(sum); + } +} + +__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstOffset, int dstCols) +{ + const int x = get_group_id(0) * get_local_size(0) + get_local_id(0); + const int y = get_group_id(1); + + __local float4 smem[256 + 4]; + + float4 sum; + + const int src_y = 2*y; + const int last_row = srcRows - 1; + const int last_col = srcCols - 1; + + float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); + float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); + float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); + + sum = 0; + + sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)])); + sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)])); + sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)])); + sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)])); + sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)])); + + smem[2 + get_local_id(0)] = sum; + + if (get_local_id(0) < 2) + { + const int left_x = x - 2; + + sum = 0; + + sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)])); + sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)])); + sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)])); + sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)])); + sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)])); + + smem[get_local_id(0)] = sum; + } + + if (get_local_id(0) > 253) + { + const int right_x = x + 2; + + sum = 0; + + sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)])); + sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)])); + sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)])); + sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)])); + sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)])); + + smem[4 + get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < 128) + { + const int tid2 = get_local_id(0) * 2; + + sum = 0; + + sum = sum + co3 * smem[2 + tid2 - 2]; + sum = sum + co2 * smem[2 + tid2 - 1]; + sum = sum + co1 * smem[2 + tid2 ]; + sum = sum + co2 * smem[2 + tid2 + 1]; + sum = sum + co3 * smem[2 + tid2 + 2]; + + const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; + + if (dst_x < dstCols) + dst[y * dstStep / 4 + dst_x] = round_uchar4_float4(sum); + } +} + +__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float *dst, int dstStep, int dstOffset, int dstCols) +{ + const int x = get_group_id(0) * get_local_size(0) + get_local_id(0); + const int y = get_group_id(1); + + __local float smem[256 + 4]; + + float sum; + + const int src_y = 2*y; + const int last_row = srcRows - 1; + const int last_col = srcCols - 1; + + sum = 0; + + sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]; + sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]; + sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)]; + sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]; + sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]; + + smem[2 + get_local_id(0)] = sum; + + if (get_local_id(0) < 2) + { + const int left_x = x - 2; + + sum = 0; + + sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]; + sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]; + sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)]; + sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]; + sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]; + + smem[get_local_id(0)] = sum; + } + + if (get_local_id(0) > 253) + { + const int right_x = x + 2; + + sum = 0; + + sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]; + sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]; + sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)]; + sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]; + sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]; + + smem[4 + get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < 128) + { + const int tid2 = get_local_id(0) * 2; + + sum = 0; + + sum = sum + 0.0625f * smem[2 + tid2 - 2]; + sum = sum + 0.25f * smem[2 + tid2 - 1]; + sum = sum + 0.375f * smem[2 + tid2 ]; + sum = sum + 0.25f * smem[2 + tid2 + 1]; + sum = sum + 0.0625f * smem[2 + tid2 + 2]; + + const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; + + if (dst_x < dstCols) + dst[y * dstStep / 4 + dst_x] = sum; + } +} + +__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstOffset, int dstCols) +{ + const int x = get_group_id(0) * get_local_size(0) + get_local_id(0); + const int y = get_group_id(1); + + __local float4 smem[256 + 4]; + + float4 sum; + + const int src_y = 2*y; + const int last_row = srcRows - 1; + const int last_col = srcCols - 1; + + float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); + float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); + float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); + + sum = 0; + + sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]; + sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]; + sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)]; + sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]; + sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]; + + smem[2 + get_local_id(0)] = sum; + + if (get_local_id(0) < 2) + { + const int left_x = x - 2; + + sum = 0; + + sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]; + sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]; + sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)]; + sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]; + sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]; + + smem[get_local_id(0)] = sum; + } + + if (get_local_id(0) > 253) + { + const int right_x = x + 2; + + sum = 0; + + sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]; + sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]; + sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)]; + sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]; + sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]; + + smem[4 + get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < 128) + { + const int tid2 = get_local_id(0) * 2; + + sum = 0; + + sum = sum + co3 * smem[2 + tid2 - 2]; + sum = sum + co2 * smem[2 + tid2 - 1]; + sum = sum + co1 * smem[2 + tid2 ]; + sum = sum + co2 * smem[2 + tid2 + 1]; + sum = sum + co3 * smem[2 + tid2 + 2]; + + const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; + + if (dst_x < dstCols) + dst[y * dstStep / 16 + dst_x] = sum; + } +} diff --git a/modules/ocl/src/kernels/pyr_up.cl b/modules/ocl/src/kernels/pyr_up.cl new file mode 100644 index 000000000..dd3ba43d1 --- /dev/null +++ b/modules/ocl/src/kernels/pyr_up.cl @@ -0,0 +1,750 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Zhang Chunpeng chunpeng@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +//#pragma OPENCL EXTENSION cl_amd_printf : enable + +uchar get_valid_uchar(uchar data) +{ + return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0); +} +/////////////////////////////////////////////////////////////////////// +////////////////////////// CV_8UC1 ////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +__kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst, + int srcRows,int dstRows,int srcCols,int dstCols, + int srcOffset,int dstOffset,int srcStep,int dstStep) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + __local float s_srcPatch[10][10]; + __local float s_dstPatch[20][16]; + + + if( get_local_id(0) < 10 && get_local_id(1) < 10 ) + { + int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; + int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; + + srcx = abs(srcx); + srcx = min(srcCols - 1,srcx); + + srcy = abs(srcy); + srcy = min(srcRows -1 ,srcy); + + s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]); + + } + + barrier(CLK_LOCAL_MEM_FENCE); + + float sum = 0; + const int evenFlag = (int)((get_local_id(0) & 1) == 0); + const int oddFlag = (int)((get_local_id(0) & 1) != 0); + const bool eveny = ((get_local_id(1) & 1) == 0); + const int tidx = get_local_id(0); + + if(eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]; + } + + s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; + + if (get_local_id(1) < 2) + { + sum = 0; + + if (eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; + } + + s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; + } + + if (get_local_id(1) > 13) + { + sum = 0; + + if (eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; + } + s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + sum = 0; + + const int tidy = get_local_id(1); + + sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)]; + sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)]; + sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)]; + sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)]; + sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; + + if ((x < dstCols) && (y < dstRows)) + dst[x + y * dstStep] = (float)(4.0f * sum); + +} + +/////////////////////////////////////////////////////////////////////// +////////////////////////// CV_16UC1 ///////////////////////////////// +/////////////////////////////////////////////////////////////////////// +__kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst, + int srcRows,int dstRows,int srcCols,int dstCols, + int srcOffset,int dstOffset,int srcStep,int dstStep) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + __local float s_srcPatch[10][10]; + __local float s_dstPatch[20][16]; + + srcStep = srcStep >> 1; + dstStep = dstStep >> 1; + srcOffset = srcOffset >> 1; + dstOffset = dstOffset >> 1; + + + if( get_local_id(0) < 10 && get_local_id(1) < 10 ) + { + int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; + int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; + + srcx = abs(srcx); + srcx = min(srcCols - 1,srcx); + + srcy = abs(srcy); + srcy = min(srcRows -1 ,srcy); + + s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]); + + } + + barrier(CLK_LOCAL_MEM_FENCE); + + float sum = 0; + + const int evenFlag = (int)((get_local_id(0) & 1) == 0); + const int oddFlag = (int)((get_local_id(0) & 1) != 0); + const bool eveny = ((get_local_id(1) & 1) == 0); + const int tidx = get_local_id(0); + + if(eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]; + } + + s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; + + if (get_local_id(1) < 2) + { + sum = 0; + + if (eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; + } + + s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; + } + + if (get_local_id(1) > 13) + { + sum = 0; + + if (eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; + } + s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + sum = 0; + + const int tidy = get_local_id(1); + + sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)]; + sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)]; + sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)]; + sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)]; + sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; + + if ((x < dstCols) && (y < dstRows)) + dst[x + y * dstStep] = (float)(4.0f * sum); + +} + +/////////////////////////////////////////////////////////////////////// +////////////////////////// CV_32FC1 ///////////////////////////////// +/////////////////////////////////////////////////////////////////////// +__kernel void pyrUp_C1_D5(__global float* src,__global float* dst, + int srcRows,int dstRows,int srcCols,int dstCols, + int srcOffset,int dstOffset,int srcStep,int dstStep) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + __local float s_srcPatch[10][10]; + __local float s_dstPatch[20][16]; + + srcOffset = srcOffset >> 2; + dstOffset = dstOffset >> 2; + srcStep = srcStep >> 2; + dstStep = dstStep >> 2; + + + if( get_local_id(0) < 10 && get_local_id(1) < 10 ) + { + int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; + int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; + + srcx = abs(srcx); + srcx = min(srcCols - 1,srcx); + + srcy = abs(srcy); + srcy = min(srcRows -1 ,srcy); + + s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]); + + } + + barrier(CLK_LOCAL_MEM_FENCE); + + float sum = 0; + const int evenFlag = (int)((get_local_id(0) & 1) == 0); + const int oddFlag = (int)((get_local_id(0) & 1) != 0); + const bool eveny = ((get_local_id(1) & 1) == 0); + const int tidx = get_local_id(0); + + if(eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]; + } + + s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; + + if (get_local_id(1) < 2) + { + sum = 0; + + if (eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; + } + + s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; + } + + if (get_local_id(1) > 13) + { + sum = 0; + + if (eveny) + { + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; + } + s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + sum = 0; + + const int tidy = get_local_id(1); + + sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)]; + sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)]; + sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)]; + sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)]; + sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; + + if ((x < dstCols) && (y < dstRows)) + dst[x + y * dstStep] = (float)(4.0f * sum); + +} + +/////////////////////////////////////////////////////////////////////// +////////////////////////// CV_8UC4 ////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +float4 covert_uchar4_to_float4(uchar4 data) +{ + float4 f4Data = {0,0,0,0}; + + f4Data.x = (float)data.x; + f4Data.y = (float)data.y; + f4Data.z = (float)data.z; + f4Data.w = (float)data.w; + + return f4Data; +} + + +uchar4 convert_float4_to_uchar4(float4 data) +{ + uchar4 u4Data; + + u4Data.x = get_valid_uchar(data.x); + u4Data.y = get_valid_uchar(data.y); + u4Data.z = get_valid_uchar(data.z); + u4Data.w = get_valid_uchar(data.w); + + return u4Data; +} + +float4 int_x_float4(int leftOpr,float4 rightOpr) +{ + float4 result = {0,0,0,0}; + + result.x = rightOpr.x * leftOpr; + result.y = rightOpr.y * leftOpr; + result.z = rightOpr.z * leftOpr; + result.w = rightOpr.w * leftOpr; + + return result; +} + +float4 float4_x_float4(float4 leftOpr,float4 rightOpr) +{ + float4 result; + + result.x = leftOpr.x * rightOpr.x; + result.y = leftOpr.y * rightOpr.y; + result.z = leftOpr.z * rightOpr.z; + result.w = leftOpr.w * rightOpr.w; + + return result; +} + +__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, + int srcRows,int dstRows,int srcCols,int dstCols, + int srcOffset,int dstOffset,int srcStep,int dstStep) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + __local float4 s_srcPatch[10][10]; + __local float4 s_dstPatch[20][16]; + + srcOffset >>= 2; + dstOffset >>= 2; + srcStep >>= 2; + dstStep >>= 2; + + + if( get_local_id(0) < 10 && get_local_id(1) < 10 ) + { + int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; + int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; + + srcx = abs(srcx); + srcx = min(srcCols - 1,srcx); + + srcy = abs(srcy); + srcy = min(srcRows -1 ,srcy); + + s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + float4 sum = (float4)(0,0,0,0); + + const int evenFlag = (int)((get_local_id(0) & 1) == 0); + const int oddFlag = (int)((get_local_id(0) & 1) != 0); + const bool eveny = ((get_local_id(1) & 1) == 0); + const int tidx = get_local_id(0); + + float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); + float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); + float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); + + + if(eveny) + { + sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]); + + } + + s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; + + if (get_local_id(1) < 2) + { + sum = 0; + + if (eveny) + { + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]); + } + + s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; + } + + if (get_local_id(1) > 13) + { + sum = 0; + + if (eveny) + { + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]); + + } + s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + sum = 0; + + const int tidy = get_local_id(1); + + sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]); + sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]); + sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]); + sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]); + sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]); + + if ((x < dstCols) && (y < dstRows)) + { + dst[x + y * dstStep] = convert_float4_to_uchar4(int_x_float4(4.0f,sum)); + } +} +/////////////////////////////////////////////////////////////////////// +////////////////////////// CV_16UC4 ////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +float4 covert_ushort4_to_float4(ushort4 data) +{ + float4 f4Data = {0,0,0,0}; + + f4Data.x = (float)data.x; + f4Data.y = (float)data.y; + f4Data.z = (float)data.z; + f4Data.w = (float)data.w; + + return f4Data; +} + + +ushort4 convert_float4_to_ushort4(float4 data) +{ + ushort4 u4Data; + + u4Data.x = (float)data.x; + u4Data.y = (float)data.y; + u4Data.z = (float)data.z; + u4Data.w = (float)data.w; + + return u4Data; +} + + +__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, + int srcRows,int dstRows,int srcCols,int dstCols, + int srcOffset,int dstOffset,int srcStep,int dstStep) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + __local float4 s_srcPatch[10][10]; + __local float4 s_dstPatch[20][16]; + + srcOffset >>= 3; + dstOffset >>= 3; + srcStep >>= 3; + dstStep >>= 3; + + + if( get_local_id(0) < 10 && get_local_id(1) < 10 ) + { + int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; + int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; + + srcx = abs(srcx); + srcx = min(srcCols - 1,srcx); + + srcy = abs(srcy); + srcy = min(srcRows -1 ,srcy); + + s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + float4 sum = (float4)(0,0,0,0); + + const int evenFlag = (int)((get_local_id(0) & 1) == 0); + const int oddFlag = (int)((get_local_id(0) & 1) != 0); + const bool eveny = ((get_local_id(1) & 1) == 0); + const int tidx = get_local_id(0); + + float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); + float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); + float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); + + + if(eveny) + { + sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]); + + } + + s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; + + if (get_local_id(1) < 2) + { + sum = 0; + + if (eveny) + { + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]); + } + + s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; + } + + if (get_local_id(1) > 13) + { + sum = 0; + + if (eveny) + { + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]); + + } + s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + sum = 0; + + const int tidy = get_local_id(1); + + sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]); + sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]); + sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]); + sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]); + sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]); + + if ((x < dstCols) && (y < dstRows)) + { + dst[x + y * dstStep] = convert_float4_to_ushort4(int_x_float4(4.0f,sum)); + } +} + +/////////////////////////////////////////////////////////////////////// +////////////////////////// CV_32FC4 ////////////////////////////////// +/////////////////////////////////////////////////////////////////////// +__kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst, + int srcRows,int dstRows,int srcCols,int dstCols, + int srcOffset,int dstOffset,int srcStep,int dstStep) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + __local float4 s_srcPatch[10][10]; + __local float4 s_dstPatch[20][16]; + + srcOffset >>= 4; + dstOffset >>= 4; + srcStep >>= 4; + dstStep >>= 4; + + + if( get_local_id(0) < 10 && get_local_id(1) < 10 ) + { + int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; + int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; + + srcx = abs(srcx); + srcx = min(srcCols - 1,srcx); + + srcy = abs(srcy); + srcy = min(srcRows -1 ,srcy); + + s_srcPatch[get_local_id(1)][get_local_id(0)] = (float4)(src[srcx + srcy * srcStep]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + float4 sum = (float4)(0,0,0,0); + + const int evenFlag = (int)((get_local_id(0) & 1) == 0); + const int oddFlag = (int)((get_local_id(0) & 1) != 0); + const bool eveny = ((get_local_id(1) & 1) == 0); + const int tidx = get_local_id(0); + + float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); + float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); + float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); + + + if(eveny) + { + sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]); + + } + + s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; + + if (get_local_id(1) < 2) + { + sum = 0; + + if (eveny) + { + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]); + } + + s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; + } + + if (get_local_id(1) > 13) + { + sum = 0; + + if (eveny) + { + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]); + sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]); + sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]); + + } + s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + sum = 0; + + const int tidy = get_local_id(1); + + sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]); + sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]); + sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]); + sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]); + sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]); + + if ((x < dstCols) && (y < dstRows)) + { + dst[x + y * dstStep] = 4.0f * sum; + } +} \ No newline at end of file diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp new file mode 100644 index 000000000..ad31b00c6 --- /dev/null +++ b/modules/ocl/src/match_template.cpp @@ -0,0 +1,560 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#include +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +#define EXT_FP64 0 + +#if !defined (HAVE_OPENCL) +void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); } +#else +//helper routines +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *match_template; + } +} + +namespace cv { namespace ocl +{ + void matchTemplate_SQDIFF( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + + void matchTemplate_SQDIFF_NORMED( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + + void matchTemplate_CCORR( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + + void matchTemplate_CCORR_NORMED( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + + void matchTemplate_CCOFF( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + + void matchTemplate_CCOFF_NORMED( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + + + void matchTemplateNaive_SQDIFF( + const oclMat& image, const oclMat& templ, oclMat& result, int cn); + + void matchTemplateNaive_CCORR( + const oclMat& image, const oclMat& templ, oclMat& result, int cn); + + // Evaluates optimal template's area threshold. If + // template's area is less than the threshold, we use naive match + // template version, otherwise FFT-based (if available) + int getTemplateThreshold(int method, int depth) + { + switch (method) + { + case CV_TM_CCORR: + if (depth == CV_32F) return 250; + if (depth == CV_8U) return 300; + break; + case CV_TM_SQDIFF: + if (depth == CV_32F) return MAXSHORT; // do naive SQDIFF for CV_32F + if (depth == CV_8U) return 300; + break; + } + CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode"); + return 0; + } + + + ////////////////////////////////////////////////////////////////////// + // SQDIFF + void matchTemplate_SQDIFF( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) + { + result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); + if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) + { + matchTemplateNaive_SQDIFF(image, templ, result, image.channels()); + return; + } + else + { + // TODO + CV_Error(CV_StsBadArg, "Not supported yet for this size template"); + } + } + + void matchTemplate_SQDIFF_NORMED( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) + { + matchTemplate_CCORR(image,templ,result,buf); + buf.image_sums.resize(1); + buf.image_sqsums.resize(1); + + integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]); + +#if EXT_FP64 && SQRSUM_FIXED + unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; +#else + Mat sqr_mat = templ.reshape(1); + unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0]; +#endif + + Context *clCxt = image.clCxt; + string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U); + } + + void matchTemplateNaive_SQDIFF( + const oclMat& image, const oclMat& templ, oclMat& result, int cn) + { + CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) + || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F); + CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1); + CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); + + Context *clCxt = image.clCxt; + string kernelName = "matchTemplate_Naive_SQDIFF"; + + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); + } + + ////////////////////////////////////////////////////////////////////// + // CCORR + void matchTemplate_CCORR( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) + { + result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); + if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) + { + matchTemplateNaive_CCORR(image, templ, result, image.channels()); + return; + } + else + { + CV_Error(CV_StsBadArg, "Not supported yet for this size template"); + if(image.depth() == CV_8U && templ.depth() == CV_8U) + { + image.convertTo(buf.imagef, CV_32F); + templ.convertTo(buf.templf, CV_32F); + } + CV_Assert(image.channels() == 1); + oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels())); + filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0)); + result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1)); + } + } + + void matchTemplate_CCORR_NORMED( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) + { + matchTemplate_CCORR(image,templ,result,buf); + buf.image_sums.resize(1); + buf.image_sqsums.resize(1); + + integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]); +#if EXT_FP64 && SQRSUM_FIXED + unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; +#elif EXT_FP64 + oclMat templ_c1 = templ.reshape(1); + multiply(templ_c1, templ_c1, templ_c1); + unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0]; +#else + Mat m_templ_c1 = templ.reshape(1); + multiply(m_templ_c1, m_templ_c1, m_templ_c1); + unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0]; +#endif + Context *clCxt = image.clCxt; + string kernelName = "normalizeKernel"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U); + } + + void matchTemplateNaive_CCORR( + const oclMat& image, const oclMat& templ, oclMat& result, int cn) + { + CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) + || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F); + CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1); + CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); + + Context *clCxt = image.clCxt; + string kernelName = "matchTemplate_Naive_CCORR"; + + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); + } + ////////////////////////////////////////////////////////////////////// + // CCOFF + void matchTemplate_CCOFF( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) + { + CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U); + + matchTemplate_CCORR(image,templ,result,buf); + + Context *clCxt = image.clCxt; + string kernelName; + + kernelName = "matchTemplate_Prepared_CCOFF"; + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + // to be continued in the following section + if(image.channels() == 1) + { + buf.image_sums.resize(1); + // FIXME: temp fix for incorrect integral kernel + oclMat tmp_oclmat; + integral(image, buf.image_sums[0], tmp_oclmat); + + float templ_sum = 0; +#if EXT_FP64 + templ_sum = (float)sum(templ)[0] / templ.size().area(); +#else + Mat o_templ = templ; + templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine +#endif + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) ); + } + else + { + Vec4f templ_sum = Vec4f::all(0); +#if EXT_FP64 + split(image,buf.images); + templ_sum = sum(templ) / templ.size().area(); +#else + // temp fix for non-double supported machine + Mat o_templ = templ, o_image = image; + vector o_mat_vector; + o_mat_vector.resize(image.channels()); + buf.images.resize(image.channels()); + split(o_image, o_mat_vector); + for(int i = 0; i < o_mat_vector.size(); i ++) + { + buf.images[i] = oclMat(o_mat_vector[i]); + } + templ_sum = sum(o_templ) / templ.size().area(); +#endif + buf.image_sums.resize(buf.images.size()); + + for(int i = 0; i < image.channels(); i ++) + { + // FIXME: temp fix for incorrect integral kernel + oclMat omat_temp; + integral(buf.images[i], buf.image_sums[i], omat_temp); + } + switch(image.channels()) + { + case 4: + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) ); + break; + default: + CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); + break; + } + } + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); + } + + void matchTemplate_CCOFF_NORMED( + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) + { + image.convertTo(buf.imagef, CV_32F); + templ.convertTo(buf.templf, CV_32F); + + matchTemplate_CCORR(buf.imagef, buf.templf, result, buf); + float scale = 1.f/templ.size().area(); + + Context *clCxt = image.clCxt; + string kernelName; + + kernelName = "matchTemplate_Prepared_CCOFF_NORMED"; + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + args.push_back( make_pair( sizeof(cl_float),(void *)&scale) ); + // to be continued in the following section + if(image.channels() == 1) + { + buf.image_sums.resize(1); + buf.image_sqsums.resize(1); + integral(image, buf.image_sums[0], buf.image_sqsums[0]); + float templ_sum = 0; + float templ_sqsum = 0; +#if EXT_FP64 + templ_sum = (float)sum(templ)[0]; +#if SQRSUM_FIXED + templ_sqsum = sqrSum(templ); +#else + oclMat templ_sqr = templ; + multiply(templ,templ, templ_sqr); + templ_sqsum = sum(templ_sqr)[0]; +#endif //SQRSUM_FIXED + templ_sqsum -= scale * templ_sum * templ_sum; + templ_sum *= scale; +#else + // temp fix for non-double supported machine + Mat o_templ = templ; + templ_sum = (float)sum(o_templ)[0]; + templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum; + templ_sum *= scale; +#endif + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) ); + } + else + { + Vec4f templ_sum = Vec4f::all(0); + Vec4f templ_sqsum = Vec4f::all(0); +#if EXT_FP64 + split(image,buf.images); + templ_sum = sum(templ); +#if SQRSUM_FIXED + templ_sqsum = sqrSum(templ); +#else + oclMat templ_sqr = templ; + multiply(templ,templ, templ_sqr); + templ_sqsum = sum(templ_sqr); +#endif //SQRSUM_FIXED + templ_sqsum -= scale * templ_sum * templ_sum; + +#else + // temp fix for non-double supported machine + Mat o_templ = templ, o_image = image; + + vector o_mat_vector; + o_mat_vector.resize(image.channels()); + buf.images.resize(image.channels()); + split(o_image, o_mat_vector); + for(int i = 0; i < o_mat_vector.size(); i ++) + { + buf.images[i] = oclMat(o_mat_vector[i]); + } + templ_sum = sum(o_templ); + templ_sqsum = sum(o_templ.mul(o_templ)); +#endif + float templ_sqsum_sum = 0; + for(int i = 0; i < image.channels(); i ++) + { + templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i]; + } + templ_sum *= scale; + buf.image_sums.resize(buf.images.size()); + buf.image_sqsums.resize(buf.images.size()); + + for(int i = 0; i < image.channels(); i ++) + { + integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]); + } + + switch(image.channels()) + { + case 4: + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) ); + args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) ); + break; + default: + CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); + break; + } + } + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); + } + +}/*ocl*/} /*cv*/ + +void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method) +{ + MatchTemplateBuf buf; + matchTemplate(image,templ, result, method,buf); +} +void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf) +{ + CV_Assert(image.type() == templ.type()); + CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows); + + typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&); + + const Caller callers[] = { + ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED, + ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED, + ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED + }; + + Caller caller = callers[method]; + CV_Assert(caller); + caller(image, templ, result, buf); +} +#endif // diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp new file mode 100644 index 000000000..3f0a241cf --- /dev/null +++ b/modules/ocl/src/pyrdown.cpp @@ -0,0 +1,115 @@ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +using std::cout; +using std::endl; + +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *pyr_down; + + } +} + +////////////////////////////////////////////////////////////////////////////// +/////////////////////// add subtract multiply divide ///////////////////////// +////////////////////////////////////////////////////////////////////////////// +template +void pyrdown_run(const oclMat &src, const oclMat &dst) +{ + CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows); + + CV_Assert(src.type() == dst.type()); + CV_Assert(src.depth() != CV_8S); + + Context *clCxt = src.clCxt; + //int channels = dst.channels(); + //int depth = dst.depth(); + + string kernelName = "pyrDown"; + + //int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1}, + // {4, 0, 4, 4, 1, 1, 1}, + // {4, 0, 4, 4, 1, 1, 1}, + // {4, 0, 4, 4, 1, 1, 1} + //}; + + //size_t vector_length = vector_lengths[channels-1][depth]; + //int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); + + size_t localThreads[3] = { 256, 1, 1 }; + size_t globalThreads[3] = { src.cols, dst.rows, 1}; + + //int dst_step1 = dst.cols * dst.elemSize(); + vector > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); + + openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); +} +void pyrdown_run(const oclMat &src, const oclMat &dst) +{ + switch(src.depth()) + { + case 0: + pyrdown_run(src, dst); + break; + + case 1: + pyrdown_run(src, dst); + break; + + case 2: + pyrdown_run(src, dst); + break; + + case 3: + pyrdown_run(src, dst); + break; + + case 4: + pyrdown_run(src, dst); + break; + + case 5: + pyrdown_run(src, dst); + break; + + case 6: + pyrdown_run(src, dst); + break; + + default: + break; + } +} +////////////////////////////////////////////////////////////////////////////// +// pyrDown + +void cv::ocl::pyrDown(const oclMat& src, oclMat& dst) +{ + CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); + + //src.step = src.rows; + + dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); + + //dst.step = dst.rows; + + pyrdown_run(src, dst); +} + diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp new file mode 100644 index 000000000..ee0dfe382 --- /dev/null +++ b/modules/ocl/src/pyrup.cpp @@ -0,0 +1,88 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Zhang Chunpeng chunpeng@multicorewareinc.com +// +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* Haar features calculation */ +//#define EMU + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +#ifndef HAVE_OPENCL +void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); } +#else + +namespace cv { namespace ocl +{ + extern const char *pyr_up; + void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst) + { + dst.create(src.rows * 2, src.cols * 2, src.type()); + Context *clCxt = src.clCxt; + + const std::string kernelName = "pyrUp"; + + std::vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); + + size_t globalThreads[3] = {dst.cols, dst.rows, 1}; + size_t localThreads[3] = {16, 16, 1}; + + openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + } +}}; +#endif // HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp new file mode 100644 index 000000000..f1649bfe1 --- /dev/null +++ b/modules/ocl/test/test_blend.cpp @@ -0,0 +1,82 @@ +#include "precomp.hpp" +#include + +using namespace cv; +using namespace cv::ocl; +using namespace cvtest; +using namespace testing; +using namespace std; + +template +void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold) +{ + result_gold.create(img1.size(), img1.type()); + + int cn = img1.channels(); + + for (int y = 0; y < img1.rows; ++y) + { + const float* weights1_row = weights1.ptr(y); + const float* weights2_row = weights2.ptr(y); + const T* img1_row = img1.ptr(y); + const T* img2_row = img2.ptr(y); + T* result_gold_row = result_gold.ptr(y); + + for (int x = 0; x < img1.cols * cn; ++x) + { + float w1 = weights1_row[x / cn]; + float w2 = weights2_row[x / cn]; + result_gold_row[x] = static_cast((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f)); + } + } +} + +PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/) +{ + //cv::gpu::DeviceInfo devInfo; + cv::Size size; + int type; + bool useRoi; + + virtual void SetUp() + { + //devInfo = GET_PARAM(0); + size = GET_PARAM(0); + type = GET_PARAM(1); + /*useRoi = GET_PARAM(3);*/ + + //cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +TEST_P(Blend, Accuracy) +{ + int depth = CV_MAT_DEPTH(type); + + cv::Mat img1 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0); + cv::Mat img2 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0); + cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); + cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); + + cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F); + cv::ocl::oclMat dst(size, type); + gimg1.upload(img1); + gimg2.upload(img2); + gweights1.upload(weights1); + gweights2.upload(weights2); + cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst); + cv::Mat result; + cv::Mat result_gold; + dst.download(result); + if (depth == CV_8U) + blendLinearGold(img1, img2, weights1, weights2, result_gold); + else + blendLinearGold(img1, img2, weights1, weights2, result_gold); + + EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1 : 1e-5f, NULL) +} + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)) +)); \ No newline at end of file diff --git a/modules/ocl/test/test_columnsum.cpp b/modules/ocl/test/test_columnsum.cpp new file mode 100644 index 000000000..fe73dca76 --- /dev/null +++ b/modules/ocl/test/test_columnsum.cpp @@ -0,0 +1,105 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Chunpeng Zhang chunpeng@multicorewareinc.com +// +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include + +/////////////////////////////////////////////////////////////////////////////// +/// ColumnSum + +#ifdef HAVE_OPENCL + +//////////////////////////////////////////////////////////////////////// +// ColumnSum + +PARAM_TEST_CASE(ColumnSum, cv::Size, bool ) +{ + cv::Size size; + cv::Mat src; + bool useRoi; + + virtual void SetUp() + { + size = GET_PARAM(0); + useRoi = GET_PARAM(1); + } +}; + +TEST_P(ColumnSum, Accuracy) +{ + cv::Mat src = randomMat(size, CV_32FC1); + //cv::Mat src(size,CV_32FC1); + + //cv::ocl::oclMat d_dst = ::createMat(size,src.type(),useRoi); + cv::ocl::oclMat d_dst = loadMat(src,useRoi); + + cv::ocl::columnSum(loadMat(src,useRoi),d_dst); + + cv::Mat dst(d_dst); + + for (int j = 0; j < src.cols; ++j) + { + float gold = src.at(0, j); + float res = dst.at(0, j); + ASSERT_NEAR(res, gold, 1e-5); + } + + for (int i = 1; i < src.rows; ++i) + { + for (int j = 0; j < src.cols; ++j) + { + float gold = src.at(i, j) += src.at(i - 1, j); + float res = dst.at(i, j); + ASSERT_NEAR(res, gold, 1e-5); + } + } +} + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine( + DIFFERENT_SIZES,testing::Values(Inverse(false),Inverse(true)))); + + +#endif diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp new file mode 100644 index 000000000..683e18102 --- /dev/null +++ b/modules/ocl/test/test_match_template.cpp @@ -0,0 +1,166 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#include "precomp.hpp" +#define PERF_TEST 0 + +//////////////////////////////////////////////////////////////////////////////// +// MatchTemplate +#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED)) + +IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size); + +const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"}; + +PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMethod) +{ + cv::Size size; + cv::Size templ_size; + int cn; + int method; + + virtual void SetUp() + { + size = GET_PARAM(0); + templ_size = GET_PARAM(1); + cn = GET_PARAM(2); + method = GET_PARAM(3); + } +}; + +TEST_P(MatchTemplate8U, Accuracy) +{ + + std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; + std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl; + std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl; + std::cout << "Channels: " << cn << std::endl; + + cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); + cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn)); + + cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ); + cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); + + cv::Mat dst_gold; + cv::matchTemplate(image, templ, dst_gold, method); + + char sss [100] = ""; + + cv::Mat mat_dst; + dst.download(mat_dst); + + + EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); + +#if PERF_TEST + { + P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); + P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); + } +#endif // PERF_TEST +} + +PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMethod) +{ + cv::Size size; + cv::Size templ_size; + int cn; + int method; + + virtual void SetUp() + { + size = GET_PARAM(0); + templ_size = GET_PARAM(1); + cn = GET_PARAM(2); + method = GET_PARAM(3); + } +}; + +TEST_P(MatchTemplate32F, Accuracy) +{ + cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn)); + cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn)); + + cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ); + cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); + + cv::Mat dst_gold; + cv::matchTemplate(image, templ, dst_gold, method); + + char sss [100] = ""; + + cv::Mat mat_dst; + dst.download(mat_dst); + + EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); + +#if PERF_TEST + { + std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; + std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl; + std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl; + std::cout << "Channels: " << cn << std::endl; + P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); + P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); + } +#endif // PERF_TEST +} + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, + testing::Combine( + DIFFERENT_SIZES, + testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), + testing::Values(Channels(1), Channels(4)), + ALL_TEMPLATE_METHODS + ) +); + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( + DIFFERENT_SIZES, + testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), + testing::Values(Channels(1), Channels(4)), + testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); + diff --git a/modules/ocl/test/test_pyrdown.cpp b/modules/ocl/test/test_pyrdown.cpp new file mode 100644 index 000000000..a3fd149d4 --- /dev/null +++ b/modules/ocl/test/test_pyrdown.cpp @@ -0,0 +1,295 @@ +/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Dachuan Zhao, dachuan@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +//#define PRINT_CPU_TIME 1000 +//#define PRINT_TIME + + +#include "precomp.hpp" +#include + +#ifdef HAVE_OPENCL + +using namespace cv; +using namespace cv::ocl; +using namespace cvtest; +using namespace testing; +using namespace std; + +PARAM_TEST_CASE(PyrDown, MatType, bool) +{ + int type; + cv::Scalar val; + + //src mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mask; + cv::Mat dst; + cv::Mat dst1; //bak, for two outputs + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int dstx; + int dsty; + int maskx; + int masky; + + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + cv::Mat dst1_roi; //bak + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + cv::ocl::oclMat gdst1_whole; //bak + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdst1; //bak + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); + + cv::RNG &rng = TS::ptr()->get_rng(); + + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + mat2 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + dst1 = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + } + + void Cleanup() + { + mat1.release(); + mat2.release(); + mask.release(); + dst.release(); + dst1.release(); + mat1_roi.release(); + mat2_roi.release(); + mask_roi.release(); + dst_roi.release(); + dst1_roi.release(); + + gdst_whole.release(); + gdst1_whole.release(); + gmat1.release(); + gmat2.release(); + gdst.release(); + gdst1.release(); + gmask.release(); + } + + void random_roi() + { + cv::RNG &rng = TS::ptr()->get_rng(); + +#ifdef RANDOMROI + //randomize ROI + roicols = rng.uniform(1, mat1.cols); + roirows = rng.uniform(1, mat1.rows); + src1x = rng.uniform(0, mat1.cols - roicols); + src1y = rng.uniform(0, mat1.rows - roirows); + dstx = rng.uniform(0, dst.cols - roicols); + dsty = rng.uniform(0, dst.rows - roirows); +#else + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; +#endif + maskx = rng.uniform(0, mask.cols - roicols); + masky = rng.uniform(0, mask.rows - roirows); + src2x = rng.uniform(0, mat2.cols - roicols); + src2y = rng.uniform(0, mat2.rows - roirows); + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); + mask_roi = mask(Rect(maskx, masky, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); + + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmask = mask_roi; //end + } + +}; + +#define VARNAME(A) string(#A); + + +void PrePrint() +{ + //for(int i = 0; i < MHEIGHT; i++) + //{ + // printf("(%d) ", i); + // for(int k = 0; k < MWIDTH; k++) + // { + // printf("%d ", mat1_roi.data[i * MHEIGHT + k]); + // } + // printf("\n"); + //} +} + +void PostPrint() +{ + //dst_roi.convertTo(dst_roi,CV_32S); + //cpu_dst.convertTo(cpu_dst,CV_32S); + //dst_roi -= cpu_dst; + //cpu_dst -= dst_roi; + //for(int i = 0; i < MHEIGHT / 2; i++) + //{ + // printf("(%d) ", i); + // for(int k = 0; k < MWIDTH / 2; k++) + // { + // if(gmat1.depth() == 0) + // { + // if(gmat1.channels() == 1) + // { + // printf("%d ", dst_roi.data[i * MHEIGHT / 2 + k]); + // } + // else + // { + // printf("%d ", ((unsigned*)dst_roi.data)[i * MHEIGHT / 2 + k]); + // } + // } + // else if(gmat1.depth() == 5) + // { + // printf("%.6f ", ((float*)dst_roi.data)[i * MHEIGHT / 2 + k]); + // } + // } + // printf("\n"); + //} + //for(int i = 0; i < MHEIGHT / 2; i++) + //{ + // printf("(%d) ", i); + // for(int k = 0; k < MWIDTH / 2; k++) + // { + // if(gmat1.depth() == 0) + // { + // if(gmat1.channels() == 1) + // { + // printf("%d ", cpu_dst.data[i * MHEIGHT / 2 + k]); + // } + // else + // { + // printf("%d ", ((unsigned*)cpu_dst.data)[i * MHEIGHT / 2 + k]); + // } + // } + // else if(gmat1.depth() == 5) + // { + // printf("%.6f ", ((float*)cpu_dst.data)[i * MHEIGHT / 2 + k]); + // } + // } + // printf("\n"); + //} +} + +////////////////////////////////PyrDown///////////////////////////////////////////////// +//struct PyrDown : ArithmTestBase {}; + +TEST_P(PyrDown, Mat) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + cv::pyrDown(mat1_roi, dst_roi); + cv::ocl::pyrDown(gmat1, gdst); + + cv::Mat cpu_dst; + gdst.download(cpu_dst); + char s[1024]; + sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y); + + EXPECT_MAT_NEAR(dst_roi, cpu_dst, dst_roi.depth() == CV_32F ? 1e-5f : 1.0f, s); + + Cleanup(); + } +} + + + + +//********test**************** +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + + +#endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_pyrup.cpp b/modules/ocl/test/test_pyrup.cpp new file mode 100644 index 000000000..eac7fe1e9 --- /dev/null +++ b/modules/ocl/test/test_pyrup.cpp @@ -0,0 +1,97 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Zhang Chunpeng chunpeng@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "opencv2/core/core.hpp" + +#ifdef HAVE_OPENCL + + + + + + + + + + + +PARAM_TEST_CASE(PyrUp,cv::Size,int) +{ + cv::Size size; + int type; + + virtual void SetUp() + { + size = GET_PARAM(0); + type = GET_PARAM(1); + } +}; + +TEST_P(PyrUp,Accuracy) +{ + cv::Mat src = randomMat(size,type); + + + cv::Mat dst_gold; + cv::pyrUp(src,dst_gold); + + cv::ocl::oclMat dst; + cv::ocl::oclMat srcMat(src); + cv::ocl::pyrUp(srcMat,dst); + char s[100]={0}; + + EXPECT_MAT_NEAR(dst_gold, dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s); + +} + +#if 1 +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine( + testing::Values(cv::Size(32, 32)), + testing::Values(MatType(CV_8UC1),MatType(CV_16UC1),MatType(CV_32FC1),MatType(CV_8UC4), + MatType(CV_16UC4),MatType(CV_32FC4)))); +#endif + +#endif // HAVE_OPENCL \ No newline at end of file From 7d97e8112aa78045525d3fde55839a5bb8a05436 Mon Sep 17 00:00:00 2001 From: yao Date: Mon, 6 Aug 2012 15:51:27 +0800 Subject: [PATCH 02/35] fix bugs in tests, set device for each test case Signed-off-by: yao --- modules/ocl/test/test_blend.cpp | 5 +++-- modules/ocl/test/test_columnsum.cpp | 3 +++ modules/ocl/test/test_match_template.cpp | 6 ++++++ modules/ocl/test/test_pyrdown.cpp | 4 ++-- modules/ocl/test/test_pyrup.cpp | 12 +++--------- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp index f1649bfe1..a0391b1bb 100644 --- a/modules/ocl/test/test_blend.cpp +++ b/modules/ocl/test/test_blend.cpp @@ -33,7 +33,7 @@ void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& we PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/) { - //cv::gpu::DeviceInfo devInfo; + std::vector oclinfo; cv::Size size; int type; bool useRoi; @@ -45,7 +45,8 @@ PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/) type = GET_PARAM(1); /*useRoi = GET_PARAM(3);*/ - //cv::gpu::setDevice(devInfo.deviceID()); + int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); + CV_Assert(devnums > 0); } }; diff --git a/modules/ocl/test/test_columnsum.cpp b/modules/ocl/test/test_columnsum.cpp index fe73dca76..94e109d20 100644 --- a/modules/ocl/test/test_columnsum.cpp +++ b/modules/ocl/test/test_columnsum.cpp @@ -60,11 +60,14 @@ PARAM_TEST_CASE(ColumnSum, cv::Size, bool ) cv::Size size; cv::Mat src; bool useRoi; + std::vector oclinfo; virtual void SetUp() { size = GET_PARAM(0); useRoi = GET_PARAM(1); + int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); + CV_Assert(devnums > 0); } }; diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp index 683e18102..7d599a615 100644 --- a/modules/ocl/test/test_match_template.cpp +++ b/modules/ocl/test/test_match_template.cpp @@ -60,6 +60,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho cv::Size templ_size; int cn; int method; + std::vector oclinfo; virtual void SetUp() { @@ -67,6 +68,8 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho templ_size = GET_PARAM(1); cn = GET_PARAM(2); method = GET_PARAM(3); + int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); + CV_Assert(devnums > 0); } }; @@ -109,6 +112,7 @@ PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMeth cv::Size templ_size; int cn; int method; + std::vector oclinfo; virtual void SetUp() { @@ -116,6 +120,8 @@ PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMeth templ_size = GET_PARAM(1); cn = GET_PARAM(2); method = GET_PARAM(3); + int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); + CV_Assert(devnums > 0); } }; diff --git a/modules/ocl/test/test_pyrdown.cpp b/modules/ocl/test/test_pyrdown.cpp index a3fd149d4..f2270b4a8 100644 --- a/modules/ocl/test/test_pyrdown.cpp +++ b/modules/ocl/test/test_pyrdown.cpp @@ -119,8 +119,8 @@ PARAM_TEST_CASE(PyrDown, MatType, bool) val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); //if you want to use undefault device, set it here //setDevice(oclinfo[0]); } diff --git a/modules/ocl/test/test_pyrup.cpp b/modules/ocl/test/test_pyrup.cpp index eac7fe1e9..c6c5b9c10 100644 --- a/modules/ocl/test/test_pyrup.cpp +++ b/modules/ocl/test/test_pyrup.cpp @@ -49,22 +49,16 @@ #ifdef HAVE_OPENCL - - - - - - - - - PARAM_TEST_CASE(PyrUp,cv::Size,int) { cv::Size size; int type; + std::vector oclinfo; virtual void SetUp() { + int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); + CV_Assert(devnums > 0); size = GET_PARAM(0); type = GET_PARAM(1); } From 20131189717b75870908543ddea14fe2aaa3db06 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Mon, 6 Aug 2012 16:35:35 +0400 Subject: [PATCH 03/35] new/improved Python samples by Alexander Mordvintsev --- samples/python2/common.py | 412 +++++++++++++------------- samples/python2/feature_homography.py | 256 ++++++---------- samples/python2/plane_ar.py | 103 +++++++ samples/python2/plane_tracker.py | 171 +++++++++++ 4 files changed, 574 insertions(+), 368 deletions(-) create mode 100755 samples/python2/plane_ar.py create mode 100755 samples/python2/plane_tracker.py diff --git a/samples/python2/common.py b/samples/python2/common.py index 0f332b6d0..89f8b77df 100644 --- a/samples/python2/common.py +++ b/samples/python2/common.py @@ -1,200 +1,212 @@ -import numpy as np -import cv2 -import os -from contextlib import contextmanager -import itertools as it - -image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm'] - -def splitfn(fn): - path, fn = os.path.split(fn) - name, ext = os.path.splitext(fn) - return path, name, ext - -def anorm2(a): - return (a*a).sum(-1) -def anorm(a): - return np.sqrt( anorm2(a) ) - -def homotrans(H, x, y): - xs = H[0, 0]*x + H[0, 1]*y + H[0, 2] - ys = H[1, 0]*x + H[1, 1]*y + H[1, 2] - s = H[2, 0]*x + H[2, 1]*y + H[2, 2] - return xs/s, ys/s - -def to_rect(a): - a = np.ravel(a) - if len(a) == 2: - a = (0, 0, a[0], a[1]) - return np.array(a, np.float64).reshape(2, 2) - -def rect2rect_mtx(src, dst): - src, dst = to_rect(src), to_rect(dst) - cx, cy = (dst[1] - dst[0]) / (src[1] - src[0]) - tx, ty = dst[0] - src[0] * (cx, cy) - M = np.float64([[ cx, 0, tx], - [ 0, cy, ty], - [ 0, 0, 1]]) - return M - - -def lookat(eye, target, up = (0, 0, 1)): - fwd = np.asarray(target, np.float64) - eye - fwd /= anorm(fwd) - right = np.cross(fwd, up) - right /= anorm(right) - down = np.cross(fwd, right) - R = np.float64([right, down, fwd]) - tvec = -np.dot(R, eye) - return R, tvec - -def mtx2rvec(R): - w, u, vt = cv2.SVDecomp(R - np.eye(3)) - p = vt[0] + u[:,0]*w[0] # same as np.dot(R, vt[0]) - c = np.dot(vt[0], p) - s = np.dot(vt[1], p) - axis = np.cross(vt[0], vt[1]) - return axis * np.arctan2(s, c) - -def draw_str(dst, (x, y), s): - cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.CV_AA) - cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.CV_AA) - -class Sketcher: - def __init__(self, windowname, dests, colors_func): - self.prev_pt = None - self.windowname = windowname - self.dests = dests - self.colors_func = colors_func - self.dirty = False - self.show() - cv2.setMouseCallback(self.windowname, self.on_mouse) - - def show(self): - cv2.imshow(self.windowname, self.dests[0]) - - def on_mouse(self, event, x, y, flags, param): - pt = (x, y) - if event == cv2.EVENT_LBUTTONDOWN: - self.prev_pt = pt - if self.prev_pt and flags & cv2.EVENT_FLAG_LBUTTON: - for dst, color in zip(self.dests, self.colors_func()): - cv2.line(dst, self.prev_pt, pt, color, 5) - self.dirty = True - self.prev_pt = pt - self.show() - else: - self.prev_pt = None - - -# palette data from matplotlib/_cm.py -_jet_data = {'red': ((0., 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89,1, 1), - (1, 0.5, 0.5)), - 'green': ((0., 0, 0), (0.125,0, 0), (0.375,1, 1), (0.64,1, 1), - (0.91,0,0), (1, 0, 0)), - 'blue': ((0., 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65,0, 0), - (1, 0, 0))} - -cmap_data = { 'jet' : _jet_data } - -def make_cmap(name, n=256): - data = cmap_data[name] - xs = np.linspace(0.0, 1.0, n) - channels = [] - eps = 1e-6 - for ch_name in ['blue', 'green', 'red']: - ch_data = data[ch_name] - xp, yp = [], [] - for x, y1, y2 in ch_data: - xp += [x, x+eps] - yp += [y1, y2] - ch = np.interp(xs, xp, yp) - channels.append(ch) - return np.uint8(np.array(channels).T*255) - -def nothing(*arg, **kw): - pass - -def clock(): - return cv2.getTickCount() / cv2.getTickFrequency() - -@contextmanager -def Timer(msg): - print msg, '...', - start = clock() - try: - yield - finally: - print "%.2f ms" % ((clock()-start)*1000) - -class StatValue: - def __init__(self, smooth_coef = 0.5): - self.value = None - self.smooth_coef = smooth_coef - def update(self, v): - if self.value is None: - self.value = v - else: - c = self.smooth_coef - self.value = c * self.value + (1.0-c) * v - -class RectSelector: - def __init__(self, win, callback): - self.win = win - self.callback = callback - cv2.setMouseCallback(win, self.onmouse) - self.drag_start = None - self.drag_rect = None - def onmouse(self, event, x, y, flags, param): - x, y = np.int16([x, y]) # BUG - if event == cv2.EVENT_LBUTTONDOWN: - self.drag_start = (x, y) - if self.drag_start: - if flags & cv2.EVENT_FLAG_LBUTTON: - xo, yo = self.drag_start - x0, y0 = np.minimum([xo, yo], [x, y]) - x1, y1 = np.maximum([xo, yo], [x, y]) - self.drag_rect = None - if x1-x0 > 0 and y1-y0 > 0: - self.drag_rect = (x0, y0, x1, y1) - else: - rect = self.drag_rect - self.drag_start = None - self.drag_rect = None - if rect: - self.callback(rect) - def draw(self, vis): - if not self.drag_rect: - return False - x0, y0, x1, y1 = self.drag_rect - cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2) - return True - @property - def dragging(self): - return self.drag_rect is not None - - -def grouper(n, iterable, fillvalue=None): - '''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx''' - args = [iter(iterable)] * n - return it.izip_longest(fillvalue=fillvalue, *args) - -def mosaic(w, imgs): - '''Make a grid from images. - - w -- number of grid columns - imgs -- images (must have same size and format) - ''' - imgs = iter(imgs) - img0 = imgs.next() - pad = np.zeros_like(img0) - imgs = it.chain([img0], imgs) - rows = grouper(w, imgs, pad) - return np.vstack(map(np.hstack, rows)) - -def getsize(img): - h, w = img.shape[:2] - return w, h - -def mdot(*args): - return reduce(np.dot, args) +import numpy as np +import cv2 +import os +from contextlib import contextmanager +import itertools as it + +image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm'] + +class Bunch(object): + def __init__(self, **kw): + self.__dict__.update(kw) + def __str__(self): + return str(self.__dict__) + +def splitfn(fn): + path, fn = os.path.split(fn) + name, ext = os.path.splitext(fn) + return path, name, ext + +def anorm2(a): + return (a*a).sum(-1) +def anorm(a): + return np.sqrt( anorm2(a) ) + +def homotrans(H, x, y): + xs = H[0, 0]*x + H[0, 1]*y + H[0, 2] + ys = H[1, 0]*x + H[1, 1]*y + H[1, 2] + s = H[2, 0]*x + H[2, 1]*y + H[2, 2] + return xs/s, ys/s + +def to_rect(a): + a = np.ravel(a) + if len(a) == 2: + a = (0, 0, a[0], a[1]) + return np.array(a, np.float64).reshape(2, 2) + +def rect2rect_mtx(src, dst): + src, dst = to_rect(src), to_rect(dst) + cx, cy = (dst[1] - dst[0]) / (src[1] - src[0]) + tx, ty = dst[0] - src[0] * (cx, cy) + M = np.float64([[ cx, 0, tx], + [ 0, cy, ty], + [ 0, 0, 1]]) + return M + + +def lookat(eye, target, up = (0, 0, 1)): + fwd = np.asarray(target, np.float64) - eye + fwd /= anorm(fwd) + right = np.cross(fwd, up) + right /= anorm(right) + down = np.cross(fwd, right) + R = np.float64([right, down, fwd]) + tvec = -np.dot(R, eye) + return R, tvec + +def mtx2rvec(R): + w, u, vt = cv2.SVDecomp(R - np.eye(3)) + p = vt[0] + u[:,0]*w[0] # same as np.dot(R, vt[0]) + c = np.dot(vt[0], p) + s = np.dot(vt[1], p) + axis = np.cross(vt[0], vt[1]) + return axis * np.arctan2(s, c) + +def draw_str(dst, (x, y), s): + cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.CV_AA) + cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.CV_AA) + +class Sketcher: + def __init__(self, windowname, dests, colors_func): + self.prev_pt = None + self.windowname = windowname + self.dests = dests + self.colors_func = colors_func + self.dirty = False + self.show() + cv2.setMouseCallback(self.windowname, self.on_mouse) + + def show(self): + cv2.imshow(self.windowname, self.dests[0]) + + def on_mouse(self, event, x, y, flags, param): + pt = (x, y) + if event == cv2.EVENT_LBUTTONDOWN: + self.prev_pt = pt + if self.prev_pt and flags & cv2.EVENT_FLAG_LBUTTON: + for dst, color in zip(self.dests, self.colors_func()): + cv2.line(dst, self.prev_pt, pt, color, 5) + self.dirty = True + self.prev_pt = pt + self.show() + else: + self.prev_pt = None + + +# palette data from matplotlib/_cm.py +_jet_data = {'red': ((0., 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89,1, 1), + (1, 0.5, 0.5)), + 'green': ((0., 0, 0), (0.125,0, 0), (0.375,1, 1), (0.64,1, 1), + (0.91,0,0), (1, 0, 0)), + 'blue': ((0., 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65,0, 0), + (1, 0, 0))} + +cmap_data = { 'jet' : _jet_data } + +def make_cmap(name, n=256): + data = cmap_data[name] + xs = np.linspace(0.0, 1.0, n) + channels = [] + eps = 1e-6 + for ch_name in ['blue', 'green', 'red']: + ch_data = data[ch_name] + xp, yp = [], [] + for x, y1, y2 in ch_data: + xp += [x, x+eps] + yp += [y1, y2] + ch = np.interp(xs, xp, yp) + channels.append(ch) + return np.uint8(np.array(channels).T*255) + +def nothing(*arg, **kw): + pass + +def clock(): + return cv2.getTickCount() / cv2.getTickFrequency() + +@contextmanager +def Timer(msg): + print msg, '...', + start = clock() + try: + yield + finally: + print "%.2f ms" % ((clock()-start)*1000) + +class StatValue: + def __init__(self, smooth_coef = 0.5): + self.value = None + self.smooth_coef = smooth_coef + def update(self, v): + if self.value is None: + self.value = v + else: + c = self.smooth_coef + self.value = c * self.value + (1.0-c) * v + +class RectSelector: + def __init__(self, win, callback): + self.win = win + self.callback = callback + cv2.setMouseCallback(win, self.onmouse) + self.drag_start = None + self.drag_rect = None + def onmouse(self, event, x, y, flags, param): + x, y = np.int16([x, y]) # BUG + if event == cv2.EVENT_LBUTTONDOWN: + self.drag_start = (x, y) + if self.drag_start: + if flags & cv2.EVENT_FLAG_LBUTTON: + xo, yo = self.drag_start + x0, y0 = np.minimum([xo, yo], [x, y]) + x1, y1 = np.maximum([xo, yo], [x, y]) + self.drag_rect = None + if x1-x0 > 0 and y1-y0 > 0: + self.drag_rect = (x0, y0, x1, y1) + else: + rect = self.drag_rect + self.drag_start = None + self.drag_rect = None + if rect: + self.callback(rect) + def draw(self, vis): + if not self.drag_rect: + return False + x0, y0, x1, y1 = self.drag_rect + cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2) + return True + @property + def dragging(self): + return self.drag_rect is not None + + +def grouper(n, iterable, fillvalue=None): + '''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx''' + args = [iter(iterable)] * n + return it.izip_longest(fillvalue=fillvalue, *args) + +def mosaic(w, imgs): + '''Make a grid from images. + + w -- number of grid columns + imgs -- images (must have same size and format) + ''' + imgs = iter(imgs) + img0 = imgs.next() + pad = np.zeros_like(img0) + imgs = it.chain([img0], imgs) + rows = grouper(w, imgs, pad) + return np.vstack(map(np.hstack, rows)) + +def getsize(img): + h, w = img.shape[:2] + return w, h + +def mdot(*args): + return reduce(np.dot, args) + +def draw_keypoints(vis, keypoints, color = (0, 255, 255)): + for kp in keypoints: + x, y = kp.pt + cv2.circle(vis, (int(x), int(y)), 2, color) + diff --git a/samples/python2/feature_homography.py b/samples/python2/feature_homography.py index d553deb97..1eae58aa0 100644 --- a/samples/python2/feature_homography.py +++ b/samples/python2/feature_homography.py @@ -1,168 +1,88 @@ -''' -Feature homography -================== - -Example of using features2d framework for interactive video homography matching. -ORB features and FLANN matcher are used. - -Inspired by http://www.youtube.com/watch?v=-ZNYoL8rzPY - -Usage ------ -feature_homography.py [