updated patch to bring in the first functions with "transparent API"
This commit is contained in:
307
modules/core/src/opencl/arithm.cl
Normal file
307
modules/core/src/opencl/arithm.cl
Normal file
@@ -0,0 +1,307 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
Usage:
|
||||
after compiling this program user gets a single kernel called KF.
|
||||
the following flags should be passed:
|
||||
1) one of "-D BINARY_OP", "-D UNARY_OP", "-D MASK_BINARY_OP" or "-D MASK_UNARY_OP"
|
||||
2) the actual operation performed, one of "-D OP_...", see below the list of operations.
|
||||
2a) "-D dstDepth=<destination depth> [-D cn=<num channels]"
|
||||
for some operations, like min/max/and/or/xor it's enough
|
||||
2b) "-D srcDepth1=<source1 depth> -D srcDepth2=<source2 depth> -D dstDepth=<destination depth>
|
||||
-D workDepth=<work depth> [-D cn=<num channels>]" - for mixed-type operations
|
||||
*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CV_32S 4
|
||||
#define CV_32F 5
|
||||
|
||||
#define dstelem *(dstT*)(dstptr + dst_index)
|
||||
#define noconvert(x) x
|
||||
|
||||
#ifndef workT
|
||||
|
||||
#define srcT1 dstT
|
||||
#define srcT2 dstT
|
||||
#define workT dstT
|
||||
#define srcelem1 *(dstT*)(srcptr1 + src1_index)
|
||||
#define srcelem2 *(dstT*)(srcptr2 + src2_index)
|
||||
#define convertToDT noconvert
|
||||
|
||||
#else
|
||||
|
||||
#define srcelem1 convertToWT1(*(srcT1*)(srcptr1 + src1_index))
|
||||
#define srcelem2 convertToWT2(*(srcT2*)(srcptr2 + src2_index))
|
||||
|
||||
#endif
|
||||
|
||||
#define EXTRA_PARAMS
|
||||
|
||||
#if defined OP_ADD_SAT
|
||||
#define PROCESS_ELEM dstelem = add_sat(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_ADD
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 + srcelem2)
|
||||
|
||||
#elif defined OP_SUB_SAT
|
||||
#define PROCESS_ELEM dstelem = sub_sat(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_SUB
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 - srcelem2)
|
||||
|
||||
#elif defined OP_RSUB_SAT
|
||||
#define PROCESS_ELEM dstelem = sub_sat(srcelem2, srcelem1)
|
||||
|
||||
#elif defined OP_RSUB
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem2 - srcelem1)
|
||||
|
||||
#elif defined OP_ABSDIFF
|
||||
#define PROCESS_ELEM dstelem = abs_diff(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_AND
|
||||
#define PROCESS_ELEM dstelem = srcelem1 & srcelem2
|
||||
|
||||
#elif defined OP_OR
|
||||
#define PROCESS_ELEM dstelem = srcelem1 | srcelem2
|
||||
|
||||
#elif defined OP_XOR
|
||||
#define PROCESS_ELEM dstelem = srcelem1 ^ srcelem2
|
||||
|
||||
#elif defined OP_NOT
|
||||
#define PROCESS_ELEM dstelem = ~srcelem1
|
||||
|
||||
#elif defined OP_MIN
|
||||
#define PROCESS_ELEM dstelem = min(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_MAX
|
||||
#define PROCESS_ELEM dstelem = max(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_MUL
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2)
|
||||
|
||||
#elif defined OP_MUL_SCALE
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT scale
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2 * scale)
|
||||
|
||||
#elif defined OP_DIV
|
||||
#define PROCESS_ELEM \
|
||||
workT e2 = srcelem2, zero = (workT)(0); \
|
||||
dstelem = convertToDT(e2 != zero ? srcelem1 / e2 : zero)
|
||||
|
||||
#elif defined OP_DIV_SCALE
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT scale
|
||||
#define PROCESS_ELEM \
|
||||
workT e2 = srcelem2, zero = (workT)(0); \
|
||||
dstelem = convertToDT(e2 != zero ? srcelem1 * scale / e2 : zero)
|
||||
|
||||
#elif defined OP_RECIP_SCALE
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT scale
|
||||
#define PROCESS_ELEM \
|
||||
workT e1 = srcelem1, zero = (workT)(0); \
|
||||
dstelem = convertToDT(e1 != zero ? scale / e1 : zero)
|
||||
|
||||
#elif defined OP_ADDW
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT alpha, workT beta, workT gamma
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma)
|
||||
|
||||
#elif defined OP_MAG
|
||||
#define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2)
|
||||
|
||||
#elif defined OP_PHASE_RADIANS
|
||||
#define PROCESS_ELEM \
|
||||
workT tmp = atan2(srcelem2, srcelem1); \
|
||||
if(tmp < 0) tmp += 6.283185307179586232; \
|
||||
dstelem = tmp
|
||||
|
||||
#elif defined OP_PHASE_DEGREES
|
||||
#define PROCESS_ELEM \
|
||||
workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465; \
|
||||
if(tmp < 0) tmp += 360; \
|
||||
dstelem = tmp
|
||||
|
||||
#elif defined OP_EXP
|
||||
#define PROCESS_ELEM dstelem = exp(srcelem1)
|
||||
|
||||
#elif defined OP_SQRT
|
||||
#define PROCESS_ELEM dstelem = sqrt(srcelem1)
|
||||
|
||||
#elif defined OP_LOG
|
||||
#define PROCESS_ELEM dstelem = log(abs(srcelem1))
|
||||
|
||||
#elif defined OP_CMP
|
||||
#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)
|
||||
|
||||
#elif defined OP_CONVERT
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1)
|
||||
|
||||
#elif defined OP_CONVERT_SCALE
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT alpha, workT beta
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta)
|
||||
|
||||
#else
|
||||
#error "unknown op type"
|
||||
#endif
|
||||
|
||||
#if defined UNARY_OP || defined MASK_UNARY_OP
|
||||
#undef srcelem2
|
||||
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
|
||||
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
|
||||
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT srcelem2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined BINARY_OP
|
||||
|
||||
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
|
||||
__global const uchar* srcptr2, int srcstep2, int srcoffset2,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols EXTRA_PARAMS )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
|
||||
int src2_index = mad24(y, srcstep2, x*sizeof(srcT2) + srcoffset2);
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
|
||||
PROCESS_ELEM;
|
||||
//printf("(x=%d, y=%d). %d, %d, %d\n", x, y, (int)srcelem1, (int)srcelem2, (int)dstelem);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined MASK_BINARY_OP
|
||||
|
||||
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
|
||||
__global const uchar* srcptr2, int srcstep2, int srcoffset2,
|
||||
__global const uchar* mask, int maskstep, int maskoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols EXTRA_PARAMS )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int mask_index = mad24(y, maskstep, x + maskoffset);
|
||||
if( mask[mask_index] )
|
||||
{
|
||||
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
|
||||
int src2_index = mad24(y, srcstep2, x*sizeof(srcT2) + srcoffset2);
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
|
||||
PROCESS_ELEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined UNARY_OP
|
||||
|
||||
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols EXTRA_PARAMS )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
|
||||
PROCESS_ELEM;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined MASK_UNARY_OP
|
||||
|
||||
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
|
||||
__global const uchar* mask, int maskstep, int maskoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols EXTRA_PARAMS )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int mask_index = mad24(y, maskstep, x + maskoffset);
|
||||
if( mask[mask_index] )
|
||||
{
|
||||
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
|
||||
PROCESS_ELEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#error "Unknown operation type"
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
74
modules/core/src/opencl/copyset.cl
Normal file
74
modules/core/src/opencl/copyset.cl
Normal file
@@ -0,0 +1,74 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
__kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols, dstT value )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int mask_index = mad24(y, maskstep, x + maskoffset);
|
||||
if( mask[mask_index] )
|
||||
{
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
*(dstT*)(dstptr + dst_index) = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void set(__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols, dstT value )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
*(dstT*)(dstptr + dst_index) = value;
|
||||
}
|
||||
}
|
||||
|
96
modules/core/src/opencl/mulspectrums.cl
Normal file
96
modules/core/src/opencl/mulspectrums.cl
Normal file
@@ -0,0 +1,96 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the uintel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business uinterruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
typedef float2 cfloat;
|
||||
inline cfloat cmulf(cfloat a, cfloat b)
|
||||
{
|
||||
return (cfloat)( a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
|
||||
}
|
||||
|
||||
inline cfloat conjf(cfloat a)
|
||||
{
|
||||
return (cfloat)( a.x, - a.y );
|
||||
}
|
||||
|
||||
__kernel void
|
||||
mulAndScaleSpectrumsKernel(
|
||||
__global const cfloat* a,
|
||||
__global const cfloat* b,
|
||||
float scale,
|
||||
__global cfloat* dst,
|
||||
uint cols,
|
||||
uint rows,
|
||||
uint mstep
|
||||
)
|
||||
{
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint idx = mad24(y, mstep / sizeof(cfloat), x);
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
cfloat v = cmulf(a[idx], b[idx]);
|
||||
dst[idx] = (cfloat)( v.x * scale, v.y * scale );
|
||||
}
|
||||
}
|
||||
__kernel void
|
||||
mulAndScaleSpectrumsKernel_CONJ(
|
||||
__global const cfloat* a,
|
||||
__global const cfloat* b,
|
||||
float scale,
|
||||
__global cfloat* dst,
|
||||
uint cols,
|
||||
uint rows,
|
||||
uint mstep
|
||||
)
|
||||
{
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint idx = mad24(y, mstep / sizeof(cfloat), x);
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
cfloat v = cmulf(a[idx], conjf(b[idx]));
|
||||
dst[idx] = (cfloat)( v.x * scale, v.y * scale );
|
||||
}
|
||||
}
|
73
modules/core/src/opencl/polarcart.cl
Normal file
73
modules/core/src/opencl/polarcart.cl
Normal file
@@ -0,0 +1,73 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
__kernel void polarToCart(__global const uchar* mask, int maskstep, int maskoffset,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols, dstT value )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int mask_index = mad24(y, maskstep, x + maskoffset);
|
||||
if( mask[mask_index] )
|
||||
{
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
*(dstT*)(dstptr + dst_index) = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void cartToPolar(__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int rows, int cols, dstT value )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
|
||||
*(dstT*)(dstptr + dst_index) = value;
|
||||
}
|
||||
}
|
104
modules/core/src/opencl/reductions.cl
Normal file
104
modules/core/src/opencl/reductions.cl
Normal file
@@ -0,0 +1,104 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Shengen Yan,yanshengen@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if FUNC_SUM
|
||||
#define FUNC(a, b) b += a;
|
||||
#elif FUNC_ABS_SUM
|
||||
#define FUNC(a, b) b += a >= (dstT)(0) ? a : -a;
|
||||
#elif FUNC_SQR_SUM
|
||||
#define FUNC(a, b) b += a * a;
|
||||
#else
|
||||
#error No sum function
|
||||
#endif
|
||||
|
||||
/**************************************Array buffer SUM**************************************/
|
||||
|
||||
__kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
|
||||
__global srcT *src, __global dstT *dst)
|
||||
{
|
||||
unsigned int lid = get_local_id(0);
|
||||
unsigned int gid = get_group_id(0);
|
||||
unsigned int id = get_global_id(0);
|
||||
unsigned int idx = offset + id + (id / cols) * invalid_cols;
|
||||
|
||||
__local dstT localmem_sum[128];
|
||||
dstT sum = (dstT)(0), temp;
|
||||
|
||||
for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
|
||||
{
|
||||
idx = offset + id + (id / cols) * invalid_cols;
|
||||
temp = convertToDstT(src[idx]);
|
||||
FUNC(temp, sum);
|
||||
}
|
||||
|
||||
if (lid > 127)
|
||||
localmem_sum[lid - 128] = sum;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lid < 128)
|
||||
localmem_sum[lid] = sum + localmem_sum[lid];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int lsize = 64; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if (lid < lsize)
|
||||
{
|
||||
int lid2 = lsize + lid;
|
||||
localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (lid == 0)
|
||||
dst[gid] = localmem_sum[0];
|
||||
}
|
Reference in New Issue
Block a user