Add blend, columnsum, pyrUp/down, matchTemplate for ocl module
This commit is contained in:
98
modules/ocl/src/blend.cpp
Normal file
98
modules/ocl/src/blend.cpp
Normal file
@@ -0,0 +1,98 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Nathan, liujun@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <iomanip>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
|
||||
oclMat& result){throw_nogpu();}
|
||||
#else
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
////////////////////////////////////OpenCL kernel strings//////////////////////////
|
||||
extern const char *blend_linear;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
|
||||
oclMat& result)
|
||||
{
|
||||
cv::ocl::Context *ctx = img1.clCxt;
|
||||
assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
|
||||
int channels = img1.channels();
|
||||
int depth = img1.depth();
|
||||
int rows = img1.rows;
|
||||
int cols = img1.cols;
|
||||
int istep = img1.step;
|
||||
int wstep = weights1.step;
|
||||
size_t globalSize[] = {cols * channels, rows, 1};
|
||||
size_t localSize[] = {16, 16, 1};
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
if(globalSize[0]!=0)
|
||||
{
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
|
||||
std::string kernelName = "BlendLinear";
|
||||
|
||||
openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
|
||||
}
|
||||
}
|
||||
#endif
|
91
modules/ocl/src/columnsum.cpp
Normal file
91
modules/ocl/src/columnsum.cpp
Normal file
@@ -0,0 +1,91 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Chunpeng Zhang, chunpeng@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include <iomanip>
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
|
||||
#if !defined(HAVE_OPENCL)
|
||||
|
||||
void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); }
|
||||
|
||||
#else /*!HAVE_OPENCL */
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
extern const char* imgproc_columnsum;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size());
|
||||
|
||||
Context *clCxt = src.clCxt;
|
||||
|
||||
const std::string kernelName = "columnSum";
|
||||
|
||||
std::vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
||||
|
||||
size_t globalThreads[3] = {dst.cols, dst.rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
||||
|
||||
}
|
||||
#endif
|
196
modules/ocl/src/kernels/blend_linear.cl
Normal file
196
modules/ocl/src/kernels/blend_linear.cl
Normal file
@@ -0,0 +1,196 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, MulticoreWare Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Liu Liujun, liujun@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
__kernel void BlendLinear_C1_D0(
|
||||
__global uchar *dst,
|
||||
__global uchar *img1,
|
||||
__global uchar *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
if (idx < cols && idy < rows)
|
||||
{
|
||||
int pos = idy * istep + idx;
|
||||
int wpos = idy * (wstep /sizeof(float)) + idx;
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C3_D0(
|
||||
__global uchar *dst,
|
||||
__global uchar *img1,
|
||||
__global uchar *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
int x = idx / 3;
|
||||
int y = idy;
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int pos = idy * istep + idx;
|
||||
int wpos = idy * (wstep /sizeof(float)) + x;
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C4_D0(
|
||||
__global uchar *dst,
|
||||
__global uchar *img1,
|
||||
__global uchar *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
int x = idx / 4;
|
||||
int y = idy;
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int pos = idy * istep + idx;
|
||||
int wpos = idy * (wstep /sizeof(float)) + x;
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C1_D5(
|
||||
__global float *dst,
|
||||
__global float *img1,
|
||||
__global float *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
if (idx < cols && idy < rows)
|
||||
{
|
||||
int pos = idy * (istep / sizeof(float)) + idx;
|
||||
int wpos = idy * (wstep /sizeof(float)) + idx;
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C3_D5(
|
||||
__global float *dst,
|
||||
__global float *img1,
|
||||
__global float *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
int x = idx / 3;
|
||||
int y = idy;
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int pos = idy * (istep / sizeof(float)) + idx;
|
||||
int wpos = idy * (wstep /sizeof(float)) + x;
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BlendLinear_C4_D5(
|
||||
__global float *dst,
|
||||
__global float *img1,
|
||||
__global float *img2,
|
||||
__global float *weight1,
|
||||
__global float *weight2,
|
||||
int rows,
|
||||
int cols,
|
||||
int istep,
|
||||
int wstep
|
||||
)
|
||||
{
|
||||
int idx = get_global_id(0);
|
||||
int idy = get_global_id(1);
|
||||
int x = idx / 4;
|
||||
int y = idy;
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int pos = idy * (istep / sizeof(float)) + idx;
|
||||
int wpos = idy * (wstep /sizeof(float)) + x;
|
||||
float w1 = weight1[wpos];
|
||||
float w2 = weight2[wpos];
|
||||
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
80
modules/ocl/src/kernels/imgproc_columnsum.cl
Normal file
80
modules/ocl/src/kernels/imgproc_columnsum.cl
Normal file
@@ -0,0 +1,80 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Chunpeng Zhang chunpeng@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#if defined (__ATI__)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
|
||||
#elif defined (__NVIDIA__)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
///////////////////////// columnSum ////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////
|
||||
/// CV_32FC1
|
||||
__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
|
||||
srcStep >>= 2;
|
||||
dstStep >>= 2;
|
||||
|
||||
if (x < srcCols)
|
||||
{
|
||||
int srcIdx = x ;
|
||||
int dstIdx = x ;
|
||||
|
||||
float sum = 0;
|
||||
|
||||
for (int y = 0; y < srcRows; ++y)
|
||||
{
|
||||
sum += src[srcIdx];
|
||||
dst[dstIdx] = sum;
|
||||
srcIdx += srcStep;
|
||||
dstIdx += dstStep;
|
||||
}
|
||||
}
|
||||
}
|
824
modules/ocl/src/kernels/match_template.cl
Normal file
824
modules/ocl/src/kernels/match_template.cl
Normal file
@@ -0,0 +1,824 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
#if defined (__ATI__)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
|
||||
#elif defined (__NVIDIA__)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
#if !defined(USE_SQR_INTEGRAL) && (defined (__ATI__) || defined (__NVIDIA__))
|
||||
#define TYPE_IMAGE_SQSUM double
|
||||
#else
|
||||
#define TYPE_IMAGE_SQSUM ulong
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// utilities
|
||||
#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, gidx + img_sqsums_offset + ox)
|
||||
#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
|
||||
// normAcc* are accurate normalization routines which make GPU matchTemplate
|
||||
// consistent with CPU one
|
||||
float normAcc(float num, float denum)
|
||||
{
|
||||
if(fabs(num) < denum)
|
||||
{
|
||||
return num / denum;
|
||||
}
|
||||
if(fabs(num) < denum * 1.125f)
|
||||
{
|
||||
return num > 0 ? 1 : -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
float normAcc_SQDIFF(float num, float denum)
|
||||
{
|
||||
if(fabs(num) < denum)
|
||||
{
|
||||
return num / denum;
|
||||
}
|
||||
if(fabs(num) < denum * 1.125f)
|
||||
{
|
||||
return num > 0 ? 1 : -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// normalize
|
||||
|
||||
__kernel
|
||||
void normalizeKernel_C1_D0
|
||||
(
|
||||
__global const TYPE_IMAGE_SQSUM * img_sqsums,
|
||||
__global float * res,
|
||||
ulong tpl_sqsum,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int img_sqsums_offset,
|
||||
int img_sqsums_step,
|
||||
int res_offset,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
img_sqsums_step /= sizeof(*img_sqsums);
|
||||
img_sqsums_offset /= sizeof(*img_sqsums);
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
float image_sqsum_ = (float)(
|
||||
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
|
||||
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
|
||||
res[res_idx] = normAcc(res[res_idx], sqrt(image_sqsum_ * tpl_sqsum));
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Prepared_SQDIFF_C1_D0
|
||||
(
|
||||
__global const TYPE_IMAGE_SQSUM * img_sqsums,
|
||||
__global float * res,
|
||||
ulong tpl_sqsum,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int img_sqsums_offset,
|
||||
int img_sqsums_step,
|
||||
int res_offset,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
img_sqsums_step /= sizeof(*img_sqsums);
|
||||
img_sqsums_offset /= sizeof(*img_sqsums);
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
float image_sqsum_ = (float)(
|
||||
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
|
||||
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
|
||||
res[res_idx] = image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
|
||||
(
|
||||
__global const TYPE_IMAGE_SQSUM * img_sqsums,
|
||||
__global float * res,
|
||||
ulong tpl_sqsum,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int img_sqsums_offset,
|
||||
int img_sqsums_step,
|
||||
int res_offset,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
img_sqsums_step /= sizeof(*img_sqsums);
|
||||
img_sqsums_offset /= sizeof(*img_sqsums);
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
float image_sqsum_ = (float)(
|
||||
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
|
||||
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
|
||||
res[res_idx] = normAcc_SQDIFF(image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum,
|
||||
sqrt(image_sqsum_ * tpl_sqsum));
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// SQDIFF
|
||||
__kernel
|
||||
void matchTemplate_Naive_SQDIFF_C1_D0
|
||||
(
|
||||
__global const uchar * img,
|
||||
__global const uchar * tpl,
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int img_offset,
|
||||
int tpl_offset,
|
||||
int res_offset,
|
||||
int img_step,
|
||||
int tpl_step,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
int i,j;
|
||||
int delta;
|
||||
int sum = 0;
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
for(i = 0; i < tpl_rows; i ++)
|
||||
{
|
||||
// get specific rows of img data
|
||||
__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
||||
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
||||
for(j = 0; j < tpl_cols; j ++)
|
||||
{
|
||||
delta = img_ptr[j] - tpl_ptr[j];
|
||||
sum = mad24(delta, delta, sum);
|
||||
}
|
||||
}
|
||||
res[res_idx] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Naive_SQDIFF_C1_D5
|
||||
(
|
||||
__global const float * img,
|
||||
__global const float * tpl,
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int img_offset,
|
||||
int tpl_offset,
|
||||
int res_offset,
|
||||
int img_step,
|
||||
int tpl_step,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
int i,j;
|
||||
float delta;
|
||||
float sum = 0;
|
||||
img_step /= sizeof(*img);
|
||||
img_offset /= sizeof(*img);
|
||||
tpl_step /= sizeof(*tpl);
|
||||
tpl_offset /= sizeof(*tpl);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
for(i = 0; i < tpl_rows; i ++)
|
||||
{
|
||||
// get specific rows of img data
|
||||
__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
||||
__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
||||
for(j = 0; j < tpl_cols; j ++)
|
||||
{
|
||||
delta = img_ptr[j] - tpl_ptr[j];
|
||||
sum = mad(delta, delta, sum);
|
||||
}
|
||||
}
|
||||
res[res_idx] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Naive_SQDIFF_C4_D0
|
||||
(
|
||||
__global const uchar4 * img,
|
||||
__global const uchar4 * tpl,
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int img_offset,
|
||||
int tpl_offset,
|
||||
int res_offset,
|
||||
int img_step,
|
||||
int tpl_step,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
int i,j;
|
||||
int4 delta;
|
||||
int4 sum = (int4)(0, 0, 0, 0);
|
||||
img_step /= sizeof(*img);
|
||||
img_offset /= sizeof(*img);
|
||||
tpl_step /= sizeof(*tpl);
|
||||
tpl_offset /= sizeof(*tpl);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
for(i = 0; i < tpl_rows; i ++)
|
||||
{
|
||||
// get specific rows of img data
|
||||
__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
||||
__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
||||
for(j = 0; j < tpl_cols; j ++)
|
||||
{
|
||||
//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
|
||||
delta.x = img_ptr[j].x - tpl_ptr[j].x;
|
||||
delta.y = img_ptr[j].y - tpl_ptr[j].y;
|
||||
delta.z = img_ptr[j].z - tpl_ptr[j].z;
|
||||
delta.w = img_ptr[j].w - tpl_ptr[j].w;
|
||||
sum = mad24(delta, delta, sum);
|
||||
}
|
||||
}
|
||||
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Naive_SQDIFF_C4_D5
|
||||
(
|
||||
__global const float4 * img,
|
||||
__global const float4 * tpl,
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int img_offset,
|
||||
int tpl_offset,
|
||||
int res_offset,
|
||||
int img_step,
|
||||
int tpl_step,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
int i,j;
|
||||
float4 delta;
|
||||
float4 sum = (float4)(0, 0, 0, 0);
|
||||
img_step /= sizeof(*img);
|
||||
img_offset /= sizeof(*img);
|
||||
tpl_step /= sizeof(*tpl);
|
||||
tpl_offset /= sizeof(*tpl);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
for(i = 0; i < tpl_rows; i ++)
|
||||
{
|
||||
// get specific rows of img data
|
||||
__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
||||
__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
||||
for(j = 0; j < tpl_cols; j ++)
|
||||
{
|
||||
//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
|
||||
delta.x = img_ptr[j].x - tpl_ptr[j].x;
|
||||
delta.y = img_ptr[j].y - tpl_ptr[j].y;
|
||||
delta.z = img_ptr[j].z - tpl_ptr[j].z;
|
||||
delta.w = img_ptr[j].w - tpl_ptr[j].w;
|
||||
sum = mad(delta, delta, sum);
|
||||
}
|
||||
}
|
||||
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// CCORR
|
||||
__kernel
|
||||
void matchTemplate_Naive_CCORR_C1_D0
|
||||
(
|
||||
__global const uchar * img,
|
||||
__global const uchar * tpl,
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int img_offset,
|
||||
int tpl_offset,
|
||||
int res_offset,
|
||||
int img_step,
|
||||
int tpl_step,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
int i,j;
|
||||
int sum = 0;
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
for(i = 0; i < tpl_rows; i ++)
|
||||
{
|
||||
// get specific rows of img data
|
||||
__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
||||
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
||||
for(j = 0; j < tpl_cols; j ++)
|
||||
{
|
||||
sum = mad24(img_ptr[j], tpl_ptr[j], sum);
|
||||
}
|
||||
}
|
||||
res[res_idx] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Naive_CCORR_C1_D5
|
||||
(
|
||||
__global const float * img,
|
||||
__global const float * tpl,
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int img_offset,
|
||||
int tpl_offset,
|
||||
int res_offset,
|
||||
int img_step,
|
||||
int tpl_step,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
int i,j;
|
||||
float sum = 0;
|
||||
img_step /= sizeof(*img);
|
||||
img_offset /= sizeof(*img);
|
||||
tpl_step /= sizeof(*tpl);
|
||||
tpl_offset /= sizeof(*tpl);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
for(i = 0; i < tpl_rows; i ++)
|
||||
{
|
||||
// get specific rows of img data
|
||||
__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
||||
__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
||||
for(j = 0; j < tpl_cols; j ++)
|
||||
{
|
||||
sum = mad(img_ptr[j], tpl_ptr[j], sum);
|
||||
}
|
||||
}
|
||||
res[res_idx] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Naive_CCORR_C4_D0
|
||||
(
|
||||
__global const uchar4 * img,
|
||||
__global const uchar4 * tpl,
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int img_offset,
|
||||
int tpl_offset,
|
||||
int res_offset,
|
||||
int img_step,
|
||||
int tpl_step,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
int i,j;
|
||||
int4 sum = (int4)(0, 0, 0, 0);
|
||||
img_step /= sizeof(*img);
|
||||
img_offset /= sizeof(*img);
|
||||
tpl_step /= sizeof(*tpl);
|
||||
tpl_offset /= sizeof(*tpl);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
for(i = 0; i < tpl_rows; i ++)
|
||||
{
|
||||
// get specific rows of img data
|
||||
__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
||||
__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
||||
for(j = 0; j < tpl_cols; j ++)
|
||||
{
|
||||
sum = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum);
|
||||
}
|
||||
}
|
||||
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Naive_CCORR_C4_D5
|
||||
(
|
||||
__global const float4 * img,
|
||||
__global const float4 * tpl,
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int img_offset,
|
||||
int tpl_offset,
|
||||
int res_offset,
|
||||
int img_step,
|
||||
int tpl_step,
|
||||
int res_step
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
int i,j;
|
||||
float4 sum = (float4)(0, 0, 0, 0);
|
||||
img_step /= sizeof(*img);
|
||||
img_offset /= sizeof(*img);
|
||||
tpl_step /= sizeof(*tpl);
|
||||
tpl_offset /= sizeof(*tpl);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
for(i = 0; i < tpl_rows; i ++)
|
||||
{
|
||||
// get specific rows of img data
|
||||
__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
||||
__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
||||
for(j = 0; j < tpl_cols; j ++)
|
||||
{
|
||||
sum = mad(convert_float4(img_ptr[j]), convert_float4(tpl_ptr[j]), sum);
|
||||
}
|
||||
}
|
||||
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// CCOFF
|
||||
__kernel
|
||||
void matchTemplate_Prepared_CCOFF_C1_D0
|
||||
(
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int res_offset,
|
||||
int res_step,
|
||||
__global const uint * img_sums,
|
||||
int img_sums_offset,
|
||||
int img_sums_step,
|
||||
float tpl_sum
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
img_sums_offset /= sizeof(*img_sums);
|
||||
img_sums_step /= sizeof(*img_sums);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
float sum = (float)(
|
||||
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
|
||||
res[res_idx] -= sum * tpl_sum;
|
||||
}
|
||||
}
|
||||
__kernel
|
||||
void matchTemplate_Prepared_CCOFF_C4_D0
|
||||
(
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int res_offset,
|
||||
int res_step,
|
||||
__global const uint * img_sums_c0,
|
||||
__global const uint * img_sums_c1,
|
||||
__global const uint * img_sums_c2,
|
||||
__global const uint * img_sums_c3,
|
||||
int img_sums_offset,
|
||||
int img_sums_step,
|
||||
float tpl_sum_c0,
|
||||
float tpl_sum_c1,
|
||||
float tpl_sum_c2,
|
||||
float tpl_sum_c3
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
img_sums_offset /= sizeof(*img_sums_c0);
|
||||
img_sums_step /= sizeof(*img_sums_c0);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
float ccorr = res[res_idx];
|
||||
ccorr -= tpl_sum_c0*(float)(
|
||||
(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
|
||||
ccorr -= tpl_sum_c1*(float)(
|
||||
(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
|
||||
ccorr -= tpl_sum_c2*(float)(
|
||||
(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
|
||||
ccorr -= tpl_sum_c3*(float)(
|
||||
(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
|
||||
res[res_idx] = ccorr;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel
|
||||
void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
|
||||
(
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int res_offset,
|
||||
int res_step,
|
||||
float weight,
|
||||
__global const uint * img_sums,
|
||||
int img_sums_offset,
|
||||
int img_sums_step,
|
||||
__global const TYPE_IMAGE_SQSUM * img_sqsums,
|
||||
int img_sqsums_offset,
|
||||
int img_sqsums_step,
|
||||
float tpl_sum,
|
||||
float tpl_sqsum
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
img_sqsums_step /= sizeof(*img_sqsums);
|
||||
img_sqsums_offset /= sizeof(*img_sqsums);
|
||||
img_sums_offset /= sizeof(*img_sums);
|
||||
img_sums_step /= sizeof(*img_sums);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
float image_sum_ = (float)(
|
||||
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
|
||||
|
||||
float image_sqsum_ = (float)(
|
||||
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
|
||||
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
|
||||
res[res_idx] = normAcc(res[res_idx] - image_sum_ * tpl_sum,
|
||||
sqrt(tpl_sqsum * (image_sqsum_ - weight * image_sum_ * image_sum_)));
|
||||
}
|
||||
}
|
||||
__kernel
|
||||
void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
|
||||
(
|
||||
__global float * res,
|
||||
int img_rows,
|
||||
int img_cols,
|
||||
int tpl_rows,
|
||||
int tpl_cols,
|
||||
int res_rows,
|
||||
int res_cols,
|
||||
int res_offset,
|
||||
int res_step,
|
||||
float weight,
|
||||
__global const uint * img_sums_c0,
|
||||
__global const uint * img_sums_c1,
|
||||
__global const uint * img_sums_c2,
|
||||
__global const uint * img_sums_c3,
|
||||
int img_sums_offset,
|
||||
int img_sums_step,
|
||||
__global const TYPE_IMAGE_SQSUM * img_sqsums_c0,
|
||||
__global const TYPE_IMAGE_SQSUM * img_sqsums_c1,
|
||||
__global const TYPE_IMAGE_SQSUM * img_sqsums_c2,
|
||||
__global const TYPE_IMAGE_SQSUM * img_sqsums_c3,
|
||||
int img_sqsums_offset,
|
||||
int img_sqsums_step,
|
||||
float tpl_sum_c0,
|
||||
float tpl_sum_c1,
|
||||
float tpl_sum_c2,
|
||||
float tpl_sum_c3,
|
||||
float tpl_sqsum
|
||||
)
|
||||
{
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
img_sqsums_step /= sizeof(*img_sqsums_c0);
|
||||
img_sqsums_offset /= sizeof(*img_sqsums_c0);
|
||||
img_sums_offset /= sizeof(*img_sums_c0);
|
||||
img_sums_step /= sizeof(*img_sums_c0);
|
||||
res_step /= sizeof(*res);
|
||||
res_offset /= sizeof(*res);
|
||||
|
||||
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
||||
|
||||
if(gidx < res_cols && gidy < res_rows)
|
||||
{
|
||||
float image_sum_c0 = (float)(
|
||||
(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
|
||||
float image_sum_c1 = (float)(
|
||||
(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
|
||||
float image_sum_c2 = (float)(
|
||||
(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
|
||||
float image_sum_c3 = (float)(
|
||||
(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
|
||||
- (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
|
||||
|
||||
float image_sqsum_c0 = (float)(
|
||||
(img_sqsums_c0[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(tpl_cols, 0)]) -
|
||||
(img_sqsums_c0[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(0, 0)]));
|
||||
float image_sqsum_c1 = (float)(
|
||||
(img_sqsums_c1[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(tpl_cols, 0)]) -
|
||||
(img_sqsums_c1[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(0, 0)]));
|
||||
float image_sqsum_c2 = (float)(
|
||||
(img_sqsums_c2[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(tpl_cols, 0)]) -
|
||||
(img_sqsums_c2[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(0, 0)]));
|
||||
float image_sqsum_c3 = (float)(
|
||||
(img_sqsums_c3[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(tpl_cols, 0)]) -
|
||||
(img_sqsums_c3[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(0, 0)]));
|
||||
|
||||
float num = res[res_idx] -
|
||||
image_sum_c0 * tpl_sum_c0 -
|
||||
image_sum_c1 * tpl_sum_c1 -
|
||||
image_sum_c2 * tpl_sum_c2 -
|
||||
image_sum_c3 * tpl_sum_c3;
|
||||
float denum = sqrt( tpl_sqsum * (
|
||||
image_sqsum_c0 - weight * image_sum_c0 * image_sum_c0 +
|
||||
image_sqsum_c1 - weight * image_sum_c1 * image_sum_c1 +
|
||||
image_sqsum_c2 - weight * image_sum_c2 * image_sum_c2 +
|
||||
image_sqsum_c3 - weight * image_sum_c0 * image_sum_c3)
|
||||
);
|
||||
res[res_idx] = normAcc(num, denum);
|
||||
}
|
||||
}
|
||||
|
500
modules/ocl/src/kernels/pyr_down.cl
Normal file
500
modules/ocl/src/kernels/pyr_down.cl
Normal file
@@ -0,0 +1,500 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
|
||||
uchar round_uchar_uchar(uchar v)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
|
||||
uchar round_uchar_int(int v)
|
||||
{
|
||||
return (uchar)((uint)v <= 255 ? v : v > 0 ? 255 : 0);
|
||||
}
|
||||
|
||||
uchar round_uchar_float(float v)
|
||||
{
|
||||
if(v - convert_int_sat_rte(v) > 1e-6 || v - convert_int_sat_rte(v) < -1e-6)
|
||||
{
|
||||
if(((int)v + 1) - (v + 0.5f) < 1e-6 && ((int)v + 1) - (v + 0.5f) > -1e-6)
|
||||
{
|
||||
v = (int)v + 0.51f;
|
||||
}
|
||||
}
|
||||
int iv = convert_int_sat_rte(v);
|
||||
return round_uchar_int(iv);
|
||||
}
|
||||
|
||||
uchar4 round_uchar4_uchar4(uchar4 v)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
|
||||
uchar4 round_uchar4_int4(int4 v)
|
||||
{
|
||||
uchar4 result;
|
||||
result.x = (uchar)(v.x <= 255 ? v.x : v.x > 0 ? 255 : 0);
|
||||
result.y = (uchar)(v.y <= 255 ? v.y : v.y > 0 ? 255 : 0);
|
||||
result.z = (uchar)(v.z <= 255 ? v.z : v.z > 0 ? 255 : 0);
|
||||
result.w = (uchar)(v.w <= 255 ? v.w : v.w > 0 ? 255 : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
uchar4 round_uchar4_float4(float4 v)
|
||||
{
|
||||
if(v.x - convert_int_sat_rte(v.x) > 1e-6 || v.x - convert_int_sat_rte(v.x) < -1e-6)
|
||||
{
|
||||
if(((int)(v.x) + 1) - (v.x + 0.5f) < 1e-6 && ((int)(v.x) + 1) - (v.x + 0.5f) > -1e-6)
|
||||
{
|
||||
v.x = (int)(v.x) + 0.51f;
|
||||
}
|
||||
}
|
||||
if(v.y - convert_int_sat_rte(v.y) > 1e-6 || v.y - convert_int_sat_rte(v.y) < -1e-6)
|
||||
{
|
||||
if(((int)(v.y) + 1) - (v.y + 0.5f) < 1e-6 && ((int)(v.y) + 1) - (v.y + 0.5f) > -1e-6)
|
||||
{
|
||||
v.y = (int)(v.y) + 0.51f;
|
||||
}
|
||||
}
|
||||
if(v.z - convert_int_sat_rte(v.z) > 1e-6 || v.z - convert_int_sat_rte(v.z) < -1e-6)
|
||||
{
|
||||
if(((int)(v.z) + 1) - (v.z + 0.5f) < 1e-6 && ((int)(v.z) + 1) - (v.z + 0.5f) > -1e-6)
|
||||
{
|
||||
v.z = (int)(v.z) + 0.51f;
|
||||
}
|
||||
}
|
||||
if(v.w - convert_int_sat_rte(v.w) > 1e-6 || v.w - convert_int_sat_rte(v.w) < -1e-6)
|
||||
{
|
||||
if(((int)(v.w) + 1) - (v.w + 0.5f) < 1e-6 && ((int)(v.w) + 1) - (v.w + 0.5f) > -1e-6)
|
||||
{
|
||||
v.w = (int)(v.w) + 0.51f;
|
||||
}
|
||||
}
|
||||
int4 iv = convert_int4_sat_rte(v);
|
||||
return round_uchar4_int4(iv);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int idx_row_low(int y, int last_row)
|
||||
{
|
||||
if(y < 0)
|
||||
{
|
||||
y = -y;
|
||||
}
|
||||
return y % (last_row + 1);
|
||||
}
|
||||
|
||||
int idx_row_high(int y, int last_row)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
if(last_row - y < 0)
|
||||
{
|
||||
i = (y - last_row);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = (last_row - y);
|
||||
}
|
||||
if(last_row - i < 0)
|
||||
{
|
||||
j = i - last_row;
|
||||
}
|
||||
else
|
||||
{
|
||||
j = last_row - i;
|
||||
}
|
||||
return j % (last_row + 1);
|
||||
}
|
||||
|
||||
int idx_row(int y, int last_row)
|
||||
{
|
||||
return idx_row_low(idx_row_high(y, last_row), last_row);
|
||||
}
|
||||
|
||||
int idx_col_low(int x, int last_col)
|
||||
{
|
||||
if(x < 0)
|
||||
{
|
||||
x = -x;
|
||||
}
|
||||
return x % (last_col + 1);
|
||||
}
|
||||
|
||||
int idx_col_high(int x, int last_col)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
if(last_col - x < 0)
|
||||
{
|
||||
i = (x - last_col);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = (last_col - x);
|
||||
}
|
||||
if(last_col - i < 0)
|
||||
{
|
||||
j = i - last_col;
|
||||
}
|
||||
else
|
||||
{
|
||||
j = last_col - i;
|
||||
}
|
||||
return j % (last_col + 1);
|
||||
}
|
||||
|
||||
int idx_col(int x, int last_col)
|
||||
{
|
||||
return idx_col_low(idx_col_high(x, last_col), last_col);
|
||||
}
|
||||
|
||||
__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols)
|
||||
{
|
||||
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
|
||||
const int y = get_group_id(1);
|
||||
|
||||
__local float smem[256 + 4];
|
||||
|
||||
float sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]);
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < 128)
|
||||
{
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep + dst_x] = round_uchar_float(sum);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstOffset, int dstCols)
|
||||
{
|
||||
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
|
||||
const int y = get_group_id(1);
|
||||
|
||||
__local float4 smem[256 + 4];
|
||||
|
||||
float4 sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
|
||||
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
|
||||
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < 128)
|
||||
{
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * smem[2 + tid2 - 2];
|
||||
sum = sum + co2 * smem[2 + tid2 - 1];
|
||||
sum = sum + co1 * smem[2 + tid2 ];
|
||||
sum = sum + co2 * smem[2 + tid2 + 1];
|
||||
sum = sum + co3 * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep / 4 + dst_x] = round_uchar4_float4(sum);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float *dst, int dstStep, int dstOffset, int dstCols)
|
||||
{
|
||||
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
|
||||
const int y = get_group_id(1);
|
||||
|
||||
__local float smem[256 + 4];
|
||||
|
||||
float sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)];
|
||||
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)];
|
||||
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)];
|
||||
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)];
|
||||
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < 128)
|
||||
{
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep / 4 + dst_x] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstOffset, int dstCols)
|
||||
{
|
||||
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0);
|
||||
const int y = get_group_id(1);
|
||||
|
||||
__local float4 smem[256 + 4];
|
||||
|
||||
float4 sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
|
||||
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
|
||||
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < 128)
|
||||
{
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = sum + co3 * smem[2 + tid2 - 2];
|
||||
sum = sum + co2 * smem[2 + tid2 - 1];
|
||||
sum = sum + co1 * smem[2 + tid2 ];
|
||||
sum = sum + co2 * smem[2 + tid2 + 1];
|
||||
sum = sum + co3 * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep / 16 + dst_x] = sum;
|
||||
}
|
||||
}
|
750
modules/ocl/src/kernels/pyr_up.cl
Normal file
750
modules/ocl/src/kernels/pyr_up.cl
Normal file
@@ -0,0 +1,750 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
uchar get_valid_uchar(uchar data)
|
||||
{
|
||||
return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_8UC1 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
__local float s_srcPatch[10][10];
|
||||
__local float s_dstPatch[20][16];
|
||||
|
||||
|
||||
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
|
||||
{
|
||||
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
|
||||
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(srcCols - 1,srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
|
||||
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float sum = 0;
|
||||
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
|
||||
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
|
||||
const bool eveny = ((get_local_id(1) & 1) == 0);
|
||||
const int tidx = get_local_id(0);
|
||||
|
||||
if(eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(1) < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(1) > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = 0;
|
||||
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = (float)(4.0f * sum);
|
||||
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16UC1 /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
__local float s_srcPatch[10][10];
|
||||
__local float s_dstPatch[20][16];
|
||||
|
||||
srcStep = srcStep >> 1;
|
||||
dstStep = dstStep >> 1;
|
||||
srcOffset = srcOffset >> 1;
|
||||
dstOffset = dstOffset >> 1;
|
||||
|
||||
|
||||
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
|
||||
{
|
||||
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
|
||||
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(srcCols - 1,srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
|
||||
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float sum = 0;
|
||||
|
||||
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
|
||||
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
|
||||
const bool eveny = ((get_local_id(1) & 1) == 0);
|
||||
const int tidx = get_local_id(0);
|
||||
|
||||
if(eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(1) < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(1) > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = 0;
|
||||
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = (float)(4.0f * sum);
|
||||
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_32FC1 /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
__local float s_srcPatch[10][10];
|
||||
__local float s_dstPatch[20][16];
|
||||
|
||||
srcOffset = srcOffset >> 2;
|
||||
dstOffset = dstOffset >> 2;
|
||||
srcStep = srcStep >> 2;
|
||||
dstStep = dstStep >> 2;
|
||||
|
||||
|
||||
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
|
||||
{
|
||||
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
|
||||
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(srcCols - 1,srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
|
||||
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float sum = 0;
|
||||
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
|
||||
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
|
||||
const bool eveny = ((get_local_id(1) & 1) == 0);
|
||||
const int tidx = get_local_id(0);
|
||||
|
||||
if(eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(1) < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(1) > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = 0;
|
||||
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = (float)(4.0f * sum);
|
||||
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_8UC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
float4 covert_uchar4_to_float4(uchar4 data)
|
||||
{
|
||||
float4 f4Data = {0,0,0,0};
|
||||
|
||||
f4Data.x = (float)data.x;
|
||||
f4Data.y = (float)data.y;
|
||||
f4Data.z = (float)data.z;
|
||||
f4Data.w = (float)data.w;
|
||||
|
||||
return f4Data;
|
||||
}
|
||||
|
||||
|
||||
uchar4 convert_float4_to_uchar4(float4 data)
|
||||
{
|
||||
uchar4 u4Data;
|
||||
|
||||
u4Data.x = get_valid_uchar(data.x);
|
||||
u4Data.y = get_valid_uchar(data.y);
|
||||
u4Data.z = get_valid_uchar(data.z);
|
||||
u4Data.w = get_valid_uchar(data.w);
|
||||
|
||||
return u4Data;
|
||||
}
|
||||
|
||||
float4 int_x_float4(int leftOpr,float4 rightOpr)
|
||||
{
|
||||
float4 result = {0,0,0,0};
|
||||
|
||||
result.x = rightOpr.x * leftOpr;
|
||||
result.y = rightOpr.y * leftOpr;
|
||||
result.z = rightOpr.z * leftOpr;
|
||||
result.w = rightOpr.w * leftOpr;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
float4 float4_x_float4(float4 leftOpr,float4 rightOpr)
|
||||
{
|
||||
float4 result;
|
||||
|
||||
result.x = leftOpr.x * rightOpr.x;
|
||||
result.y = leftOpr.y * rightOpr.y;
|
||||
result.z = leftOpr.z * rightOpr.z;
|
||||
result.w = leftOpr.w * rightOpr.w;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
__local float4 s_srcPatch[10][10];
|
||||
__local float4 s_dstPatch[20][16];
|
||||
|
||||
srcOffset >>= 2;
|
||||
dstOffset >>= 2;
|
||||
srcStep >>= 2;
|
||||
dstStep >>= 2;
|
||||
|
||||
|
||||
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
|
||||
{
|
||||
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
|
||||
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(srcCols - 1,srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float4 sum = (float4)(0,0,0,0);
|
||||
|
||||
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
|
||||
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
|
||||
const bool eveny = ((get_local_id(1) & 1) == 0);
|
||||
const int tidx = get_local_id(0);
|
||||
|
||||
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
|
||||
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
|
||||
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
|
||||
|
||||
|
||||
if(eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);
|
||||
|
||||
}
|
||||
|
||||
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(1) < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(1) > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
|
||||
|
||||
}
|
||||
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = 0;
|
||||
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
{
|
||||
dst[x + y * dstStep] = convert_float4_to_uchar4(int_x_float4(4.0f,sum));
|
||||
}
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16UC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
float4 covert_ushort4_to_float4(ushort4 data)
|
||||
{
|
||||
float4 f4Data = {0,0,0,0};
|
||||
|
||||
f4Data.x = (float)data.x;
|
||||
f4Data.y = (float)data.y;
|
||||
f4Data.z = (float)data.z;
|
||||
f4Data.w = (float)data.w;
|
||||
|
||||
return f4Data;
|
||||
}
|
||||
|
||||
|
||||
ushort4 convert_float4_to_ushort4(float4 data)
|
||||
{
|
||||
ushort4 u4Data;
|
||||
|
||||
u4Data.x = (float)data.x;
|
||||
u4Data.y = (float)data.y;
|
||||
u4Data.z = (float)data.z;
|
||||
u4Data.w = (float)data.w;
|
||||
|
||||
return u4Data;
|
||||
}
|
||||
|
||||
|
||||
__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
__local float4 s_srcPatch[10][10];
|
||||
__local float4 s_dstPatch[20][16];
|
||||
|
||||
srcOffset >>= 3;
|
||||
dstOffset >>= 3;
|
||||
srcStep >>= 3;
|
||||
dstStep >>= 3;
|
||||
|
||||
|
||||
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
|
||||
{
|
||||
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
|
||||
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(srcCols - 1,srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float4 sum = (float4)(0,0,0,0);
|
||||
|
||||
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
|
||||
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
|
||||
const bool eveny = ((get_local_id(1) & 1) == 0);
|
||||
const int tidx = get_local_id(0);
|
||||
|
||||
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
|
||||
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
|
||||
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
|
||||
|
||||
|
||||
if(eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);
|
||||
|
||||
}
|
||||
|
||||
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(1) < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(1) > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
|
||||
|
||||
}
|
||||
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = 0;
|
||||
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
{
|
||||
dst[x + y * dstStep] = convert_float4_to_ushort4(int_x_float4(4.0f,sum));
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_32FC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
__local float4 s_srcPatch[10][10];
|
||||
__local float4 s_dstPatch[20][16];
|
||||
|
||||
srcOffset >>= 4;
|
||||
dstOffset >>= 4;
|
||||
srcStep >>= 4;
|
||||
dstStep >>= 4;
|
||||
|
||||
|
||||
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
|
||||
{
|
||||
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
|
||||
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(srcCols - 1,srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float4)(src[srcx + srcy * srcStep]);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float4 sum = (float4)(0,0,0,0);
|
||||
|
||||
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
|
||||
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
|
||||
const bool eveny = ((get_local_id(1) & 1) == 0);
|
||||
const int tidx = get_local_id(0);
|
||||
|
||||
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
|
||||
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
|
||||
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
|
||||
|
||||
|
||||
if(eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]);
|
||||
|
||||
}
|
||||
|
||||
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(1) < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]);
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(1) > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]);
|
||||
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]);
|
||||
|
||||
}
|
||||
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = 0;
|
||||
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]);
|
||||
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]);
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
{
|
||||
dst[x + y * dstStep] = 4.0f * sum;
|
||||
}
|
||||
}
|
560
modules/ocl/src/match_template.cpp
Normal file
560
modules/ocl/src/match_template.cpp
Normal file
@@ -0,0 +1,560 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#include <iomanip>
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
#define EXT_FP64 0
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
|
||||
#else
|
||||
//helper routines
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *match_template;
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv { namespace ocl
|
||||
{
|
||||
void matchTemplate_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_SQDIFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_CCORR_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_CCOFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
void matchTemplate_CCOFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
|
||||
|
||||
|
||||
void matchTemplateNaive_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
|
||||
|
||||
void matchTemplateNaive_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn);
|
||||
|
||||
// Evaluates optimal template's area threshold. If
|
||||
// template's area is less than the threshold, we use naive match
|
||||
// template version, otherwise FFT-based (if available)
|
||||
int getTemplateThreshold(int method, int depth)
|
||||
{
|
||||
switch (method)
|
||||
{
|
||||
case CV_TM_CCORR:
|
||||
if (depth == CV_32F) return 250;
|
||||
if (depth == CV_8U) return 300;
|
||||
break;
|
||||
case CV_TM_SQDIFF:
|
||||
if (depth == CV_32F) return MAXSHORT; // do naive SQDIFF for CV_32F
|
||||
if (depth == CV_8U) return 300;
|
||||
break;
|
||||
}
|
||||
CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SQDIFF
|
||||
void matchTemplate_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||
{
|
||||
matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO
|
||||
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplate_SQDIFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
|
||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
||||
|
||||
#if EXT_FP64 && SQRSUM_FIXED
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
#else
|
||||
Mat sqr_mat = templ.reshape(1);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
|
||||
#endif
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
||||
}
|
||||
|
||||
void matchTemplateNaive_SQDIFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
|
||||
{
|
||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|
||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
|
||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Naive_SQDIFF";
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CCORR
|
||||
void matchTemplate_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
|
||||
{
|
||||
matchTemplateNaive_CCORR(image, templ, result, image.channels());
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(CV_StsBadArg, "Not supported yet for this size template");
|
||||
if(image.depth() == CV_8U && templ.depth() == CV_8U)
|
||||
{
|
||||
image.convertTo(buf.imagef, CV_32F);
|
||||
templ.convertTo(buf.templf, CV_32F);
|
||||
}
|
||||
CV_Assert(image.channels() == 1);
|
||||
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
|
||||
filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
|
||||
result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplate_CCORR_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
|
||||
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
|
||||
#if EXT_FP64 && SQRSUM_FIXED
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
#elif EXT_FP64
|
||||
oclMat templ_c1 = templ.reshape(1);
|
||||
multiply(templ_c1, templ_c1, templ_c1);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
|
||||
#else
|
||||
Mat m_templ_c1 = templ.reshape(1);
|
||||
multiply(m_templ_c1, m_templ_c1, m_templ_c1);
|
||||
unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0];
|
||||
#endif
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "normalizeKernel";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
|
||||
}
|
||||
|
||||
void matchTemplateNaive_CCORR(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
|
||||
{
|
||||
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|
||||
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|
||||
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
|
||||
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName = "matchTemplate_Naive_CCORR";
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CCOFF
|
||||
void matchTemplate_CCOFF(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
|
||||
|
||||
matchTemplate_CCORR(image,templ,result,buf);
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName;
|
||||
|
||||
kernelName = "matchTemplate_Prepared_CCOFF";
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
// to be continued in the following section
|
||||
if(image.channels() == 1)
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
// FIXME: temp fix for incorrect integral kernel
|
||||
oclMat tmp_oclmat;
|
||||
integral(image, buf.image_sums[0], tmp_oclmat);
|
||||
|
||||
float templ_sum = 0;
|
||||
#if EXT_FP64
|
||||
templ_sum = (float)sum(templ)[0] / templ.size().area();
|
||||
#else
|
||||
Mat o_templ = templ;
|
||||
templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
|
||||
#endif
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec4f templ_sum = Vec4f::all(0);
|
||||
#if EXT_FP64
|
||||
split(image,buf.images);
|
||||
templ_sum = sum(templ) / templ.size().area();
|
||||
#else
|
||||
// temp fix for non-double supported machine
|
||||
Mat o_templ = templ, o_image = image;
|
||||
vector<Mat> o_mat_vector;
|
||||
o_mat_vector.resize(image.channels());
|
||||
buf.images.resize(image.channels());
|
||||
split(o_image, o_mat_vector);
|
||||
for(int i = 0; i < o_mat_vector.size(); i ++)
|
||||
{
|
||||
buf.images[i] = oclMat(o_mat_vector[i]);
|
||||
}
|
||||
templ_sum = sum(o_templ) / templ.size().area();
|
||||
#endif
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
// FIXME: temp fix for incorrect integral kernel
|
||||
oclMat omat_temp;
|
||||
integral(buf.images[i], buf.image_sums[i], omat_temp);
|
||||
}
|
||||
switch(image.channels())
|
||||
{
|
||||
case 4:
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
||||
break;
|
||||
}
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
|
||||
void matchTemplate_CCOFF_NORMED(
|
||||
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
|
||||
{
|
||||
image.convertTo(buf.imagef, CV_32F);
|
||||
templ.convertTo(buf.templf, CV_32F);
|
||||
|
||||
matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
|
||||
float scale = 1.f/templ.size().area();
|
||||
|
||||
Context *clCxt = image.clCxt;
|
||||
string kernelName;
|
||||
|
||||
kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
|
||||
size_t globalThreads[3] = {result.cols, result.rows, 1};
|
||||
size_t localThreads[3] = {32, 8, 1};
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
|
||||
// to be continued in the following section
|
||||
if(image.channels() == 1)
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
buf.image_sqsums.resize(1);
|
||||
integral(image, buf.image_sums[0], buf.image_sqsums[0]);
|
||||
float templ_sum = 0;
|
||||
float templ_sqsum = 0;
|
||||
#if EXT_FP64
|
||||
templ_sum = (float)sum(templ)[0];
|
||||
#if SQRSUM_FIXED
|
||||
templ_sqsum = sqrSum(templ);
|
||||
#else
|
||||
oclMat templ_sqr = templ;
|
||||
multiply(templ,templ, templ_sqr);
|
||||
templ_sqsum = sum(templ_sqr)[0];
|
||||
#endif //SQRSUM_FIXED
|
||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||
templ_sum *= scale;
|
||||
#else
|
||||
// temp fix for non-double supported machine
|
||||
Mat o_templ = templ;
|
||||
templ_sum = (float)sum(o_templ)[0];
|
||||
templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum;
|
||||
templ_sum *= scale;
|
||||
#endif
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec4f templ_sum = Vec4f::all(0);
|
||||
Vec4f templ_sqsum = Vec4f::all(0);
|
||||
#if EXT_FP64
|
||||
split(image,buf.images);
|
||||
templ_sum = sum(templ);
|
||||
#if SQRSUM_FIXED
|
||||
templ_sqsum = sqrSum(templ);
|
||||
#else
|
||||
oclMat templ_sqr = templ;
|
||||
multiply(templ,templ, templ_sqr);
|
||||
templ_sqsum = sum(templ_sqr);
|
||||
#endif //SQRSUM_FIXED
|
||||
templ_sqsum -= scale * templ_sum * templ_sum;
|
||||
|
||||
#else
|
||||
// temp fix for non-double supported machine
|
||||
Mat o_templ = templ, o_image = image;
|
||||
|
||||
vector<Mat> o_mat_vector;
|
||||
o_mat_vector.resize(image.channels());
|
||||
buf.images.resize(image.channels());
|
||||
split(o_image, o_mat_vector);
|
||||
for(int i = 0; i < o_mat_vector.size(); i ++)
|
||||
{
|
||||
buf.images[i] = oclMat(o_mat_vector[i]);
|
||||
}
|
||||
templ_sum = sum(o_templ);
|
||||
templ_sqsum = sum(o_templ.mul(o_templ));
|
||||
#endif
|
||||
float templ_sqsum_sum = 0;
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
|
||||
}
|
||||
templ_sum *= scale;
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
buf.image_sqsums.resize(buf.images.size());
|
||||
|
||||
for(int i = 0; i < image.channels(); i ++)
|
||||
{
|
||||
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
|
||||
}
|
||||
|
||||
switch(image.channels())
|
||||
{
|
||||
case 4:
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
|
||||
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
|
||||
break;
|
||||
}
|
||||
}
|
||||
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
|
||||
}
|
||||
|
||||
}/*ocl*/} /*cv*/
|
||||
|
||||
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
|
||||
{
|
||||
MatchTemplateBuf buf;
|
||||
matchTemplate(image,templ, result, method,buf);
|
||||
}
|
||||
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
|
||||
{
|
||||
CV_Assert(image.type() == templ.type());
|
||||
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
|
||||
|
||||
typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
|
||||
|
||||
const Caller callers[] = {
|
||||
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
|
||||
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
|
||||
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
|
||||
};
|
||||
|
||||
Caller caller = callers[method];
|
||||
CV_Assert(caller);
|
||||
caller(image, templ, result, buf);
|
||||
}
|
||||
#endif //
|
115
modules/ocl/src/pyrdown.cpp
Normal file
115
modules/ocl/src/pyrdown.cpp
Normal file
@@ -0,0 +1,115 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *pyr_down;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////// add subtract multiply divide /////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
void pyrdown_run(const oclMat &src, const oclMat &dst)
|
||||
{
|
||||
CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows);
|
||||
|
||||
CV_Assert(src.type() == dst.type());
|
||||
CV_Assert(src.depth() != CV_8S);
|
||||
|
||||
Context *clCxt = src.clCxt;
|
||||
//int channels = dst.channels();
|
||||
//int depth = dst.depth();
|
||||
|
||||
string kernelName = "pyrDown";
|
||||
|
||||
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1}
|
||||
//};
|
||||
|
||||
//size_t vector_length = vector_lengths[channels-1][depth];
|
||||
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
|
||||
|
||||
size_t localThreads[3] = { 256, 1, 1 };
|
||||
size_t globalThreads[3] = { src.cols, dst.rows, 1};
|
||||
|
||||
//int dst_step1 = dst.cols * dst.elemSize();
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
|
||||
|
||||
openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
||||
}
|
||||
void pyrdown_run(const oclMat &src, const oclMat &dst)
|
||||
{
|
||||
switch(src.depth())
|
||||
{
|
||||
case 0:
|
||||
pyrdown_run<unsigned char>(src, dst);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
pyrdown_run<char>(src, dst);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
pyrdown_run<unsigned short>(src, dst);
|
||||
break;
|
||||
|
||||
case 3:
|
||||
pyrdown_run<short>(src, dst);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
pyrdown_run<int>(src, dst);
|
||||
break;
|
||||
|
||||
case 5:
|
||||
pyrdown_run<float>(src, dst);
|
||||
break;
|
||||
|
||||
case 6:
|
||||
pyrdown_run<double>(src, dst);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// pyrDown
|
||||
|
||||
void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
|
||||
{
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
|
||||
//src.step = src.rows;
|
||||
|
||||
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
||||
|
||||
//dst.step = dst.rows;
|
||||
|
||||
pyrdown_run(src, dst);
|
||||
}
|
||||
|
88
modules/ocl/src/pyrup.cpp
Normal file
88
modules/ocl/src/pyrup.cpp
Normal file
@@ -0,0 +1,88 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/* Haar features calculation */
|
||||
//#define EMU
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
#ifndef HAVE_OPENCL
|
||||
void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); }
|
||||
#else
|
||||
|
||||
namespace cv { namespace ocl
|
||||
{
|
||||
extern const char *pyr_up;
|
||||
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
|
||||
{
|
||||
dst.create(src.rows * 2, src.cols * 2, src.type());
|
||||
Context *clCxt = src.clCxt;
|
||||
|
||||
const std::string kernelName = "pyrUp";
|
||||
|
||||
std::vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
||||
|
||||
size_t globalThreads[3] = {dst.cols, dst.rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
||||
}
|
||||
}};
|
||||
#endif // HAVE_OPENCL
|
Reference in New Issue
Block a user