Merge pull request #775 from bitwangyaoyao:2.4_fixerr
This commit is contained in:
commit
a2d27429e4
@ -44,7 +44,11 @@
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -63,6 +67,9 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -111,6 +118,9 @@ __kernel void arithm_absdiff_D2 (__global ushort *src1, int src1_step, int src1_
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -145,6 +155,9 @@ __kernel void arithm_absdiff_D3 (__global short *src1, int src1_step, int src1_o
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -249,6 +262,9 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -288,6 +304,9 @@ __kernel void arithm_s_absdiff_C1_D2 (__global ushort *src1, int src1_step, in
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -319,6 +338,9 @@ __kernel void arithm_s_absdiff_C1_D3 (__global short *src1, int src1_step, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -422,6 +444,9 @@ __kernel void arithm_s_absdiff_C2_D0 (__global uchar *src1, int src1_step, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -564,6 +589,9 @@ __kernel void arithm_s_absdiff_C3_D0 (__global uchar *src1, int src1_step, int
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
|
||||
@ -618,6 +646,9 @@ __kernel void arithm_s_absdiff_C3_D2 (__global ushort *src1, int src1_step, in
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
@ -668,6 +699,9 @@ __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
|
@ -45,7 +45,11 @@
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -64,6 +68,9 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -112,6 +119,9 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -147,6 +157,9 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -252,6 +265,9 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -311,6 +327,9 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -348,6 +367,9 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -477,6 +499,9 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -664,6 +689,9 @@ __kernel void arithm_add_with_mask_C3_D0 (__global uchar *src1, int src1_step, i
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
|
||||
@ -724,6 +752,9 @@ __kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
|
||||
@ -780,6 +811,9 @@ __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, i
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
|
||||
|
@ -42,8 +42,12 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#if defined DOUBLE_SUPPORT
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
typedef double F;
|
||||
#else
|
||||
typedef float F;
|
||||
@ -65,6 +69,9 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
|
||||
{
|
||||
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -122,6 +129,9 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs
|
||||
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
|
||||
@ -182,6 +192,9 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
|
||||
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
|
||||
@ -243,6 +256,9 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
|
||||
|
||||
#define bitOfInt (sizeof(int)== 4 ? 2: 3)
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> bitOfInt) & 3)
|
||||
|
||||
int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt));
|
||||
@ -304,6 +320,9 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
|
||||
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2) & 3)
|
||||
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
@ -366,6 +385,9 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
|
||||
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 3) & 3)
|
||||
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
|
||||
|
@ -44,9 +44,13 @@
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/**************************************add with scalar without mask**************************************/
|
||||
__kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -59,6 +63,9 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -99,6 +106,9 @@ __kernel void arithm_s_add_C1_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -131,6 +141,9 @@ __kernel void arithm_s_add_C1_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -233,6 +246,9 @@ __kernel void arithm_s_add_C2_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -378,6 +394,9 @@ __kernel void arithm_s_add_C3_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
|
||||
@ -432,6 +451,9 @@ __kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
@ -482,6 +504,9 @@ __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
|
@ -44,7 +44,11 @@
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**************************************add with scalar with mask**************************************/
|
||||
@ -61,6 +65,9 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -111,6 +118,9 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -146,6 +156,9 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -267,6 +280,9 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -443,6 +459,9 @@ __kernel void arithm_s_add_with_mask_C3_D0 (__global uchar *src1, int src1_ste
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -501,6 +520,9 @@ __kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_st
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -555,6 +577,9 @@ __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_ste
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
@ -43,7 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -62,6 +66,9 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -112,6 +119,9 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -163,6 +173,9 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -215,6 +228,9 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
|
@ -43,14 +43,18 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************bitwise_and with mask**************************************/
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -64,6 +68,9 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -91,7 +98,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -105,6 +113,9 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -132,7 +143,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -146,6 +158,9 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -171,7 +186,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -185,6 +201,9 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -210,7 +229,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -242,7 +262,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -274,8 +295,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_
|
||||
|
||||
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C1_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -305,11 +326,11 @@ __kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -323,6 +344,9 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -347,7 +371,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -361,6 +386,9 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -384,7 +412,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -413,7 +442,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -442,7 +472,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1
|
||||
*((__global short2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -471,7 +502,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -500,8 +532,9 @@ __kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_
|
||||
*((__global char8 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C2_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -530,11 +563,11 @@ __kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_
|
||||
*((__global char16 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -548,6 +581,9 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
|
||||
@ -596,7 +632,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -610,6 +647,9 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
|
||||
@ -657,7 +697,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -671,6 +712,9 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
|
||||
@ -713,7 +757,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -727,6 +772,9 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
|
||||
@ -769,7 +817,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -813,7 +862,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -858,7 +908,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C3_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -905,8 +956,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -937,7 +988,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_and_with_mask_C4_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
|
@ -42,17 +42,20 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//
|
||||
#if defined (__ATI__)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (__NVIDIA__)
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************and with scalar without mask**************************************/
|
||||
__kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -63,6 +66,9 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -86,7 +92,8 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -97,6 +104,9 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -119,7 +129,8 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -131,6 +142,9 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -150,7 +164,8 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
|
||||
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -162,6 +177,9 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -181,7 +199,8 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
|
||||
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -202,7 +221,8 @@ __kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, i
|
||||
*((__global int *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -232,9 +252,9 @@ __kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step,
|
||||
*((__global char4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C1_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -256,7 +276,8 @@ __kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -268,6 +289,9 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -290,7 +314,8 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -302,6 +327,9 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -322,7 +350,8 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -343,7 +372,8 @@ __kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -364,7 +394,8 @@ __kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step,
|
||||
*((__global short2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C2_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -384,7 +415,8 @@ __kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, i
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -406,7 +438,8 @@ __kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C2_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -428,7 +461,8 @@ __kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -440,6 +474,9 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
|
||||
@ -484,7 +521,8 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -496,6 +534,9 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
|
||||
@ -539,7 +580,8 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -551,6 +593,9 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
@ -589,7 +634,8 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -601,6 +647,9 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
@ -639,7 +688,8 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -673,7 +723,8 @@ __kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, i
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -708,7 +759,8 @@ __kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C3_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -743,7 +795,8 @@ __kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -765,7 +818,8 @@ __kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -786,7 +840,8 @@ __kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -806,7 +861,8 @@ __kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -826,7 +882,8 @@ __kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step,
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -846,7 +903,8 @@ __kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, i
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -869,7 +927,8 @@ __kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_and_C4_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_C4_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
|
@ -42,17 +42,19 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#if defined (__ATI__)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (__NVIDIA__)
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************bitwise_and with scalar with mask**************************************/
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -65,6 +67,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -90,7 +95,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -103,6 +109,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -127,7 +136,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -140,6 +150,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -161,7 +174,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -174,6 +188,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -195,7 +212,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int
|
||||
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -223,7 +241,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -252,7 +271,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -280,7 +300,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -293,6 +314,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -316,7 +340,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -329,6 +354,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -351,7 +379,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -378,7 +407,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -405,7 +435,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int
|
||||
*((__global short2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -432,7 +463,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int sr
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -461,7 +493,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -489,7 +522,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -502,6 +536,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -549,7 +586,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -562,6 +600,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -608,7 +649,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -621,6 +663,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -662,7 +707,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -675,6 +721,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -716,7 +765,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -758,7 +808,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int sr
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -801,7 +852,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -844,7 +896,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -872,7 +925,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -899,7 +953,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -925,7 +980,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -951,7 +1007,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -977,7 +1034,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int sr
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -1006,7 +1064,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -1055,3 +1114,4 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -43,9 +43,12 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -61,6 +64,9 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -101,6 +107,9 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -135,6 +144,9 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -170,6 +182,9 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
|
@ -43,7 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -62,6 +66,9 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -110,6 +117,9 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -147,6 +157,9 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -185,6 +198,9 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
|
@ -43,14 +43,18 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************bitwise_or with mask**************************************/
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -64,6 +68,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -91,7 +98,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -105,6 +113,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -132,7 +143,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -146,6 +158,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -171,7 +186,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -185,6 +201,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -210,7 +229,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -242,7 +262,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -273,9 +294,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_s
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C1_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -308,8 +329,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_s
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -323,6 +344,9 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -347,7 +371,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -361,6 +386,9 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -384,7 +412,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -413,7 +442,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -442,7 +472,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_
|
||||
*((__global short2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -471,7 +502,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -501,7 +533,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C2_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -533,8 +566,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_s
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -548,6 +581,9 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
|
||||
@ -596,7 +632,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -610,6 +647,9 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
|
||||
@ -657,7 +697,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -671,6 +712,9 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
|
||||
@ -713,7 +757,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -727,6 +772,9 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
|
||||
@ -769,7 +817,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -813,7 +862,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -858,7 +908,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C3_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -905,8 +956,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_s
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -937,7 +988,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_or_with_mask_C4_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
|
@ -43,14 +43,19 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************and with scalar without mask**************************************/
|
||||
__kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -61,6 +66,9 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -84,7 +92,8 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -95,6 +104,9 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -117,7 +129,8 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -129,6 +142,9 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -148,7 +164,8 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
|
||||
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -160,6 +177,9 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -179,7 +199,8 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
|
||||
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -200,7 +221,8 @@ __kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, in
|
||||
*((__global int *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -222,7 +244,8 @@ __kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C1_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -245,8 +268,8 @@ __kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, in
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -259,6 +282,9 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -280,7 +306,8 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -293,6 +320,9 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -313,7 +343,8 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -335,7 +366,8 @@ __kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step,
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -378,7 +410,8 @@ __kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, in
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -400,7 +433,8 @@ __kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C2_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -423,7 +457,8 @@ __kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, in
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -436,6 +471,9 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
|
||||
@ -480,7 +518,8 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -493,6 +532,9 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
|
||||
@ -536,7 +578,8 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -549,6 +592,9 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
@ -587,7 +633,8 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -600,6 +647,9 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
@ -638,7 +688,8 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -673,7 +724,8 @@ __kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, in
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -706,7 +758,8 @@ __kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C3_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -742,7 +795,8 @@ __kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, in
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -765,7 +819,8 @@ __kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -787,7 +842,8 @@ __kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -808,7 +864,8 @@ __kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step,
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -829,7 +886,8 @@ __kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step,
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -850,7 +908,8 @@ __kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, in
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
@ -874,7 +933,8 @@ __kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_C4_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
|
||||
|
@ -43,14 +43,18 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************bitwise_or with scalar with mask**************************************/
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -64,6 +68,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -89,7 +96,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -103,6 +111,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -127,7 +138,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -141,6 +153,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -162,7 +177,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -176,6 +192,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -197,7 +216,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s
|
||||
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -226,7 +246,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -254,9 +275,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int
|
||||
*((__global char4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -285,7 +306,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -299,6 +321,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -322,7 +347,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -336,6 +362,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -358,7 +387,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -386,7 +416,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -414,7 +445,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int s
|
||||
*((__global short2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -442,7 +474,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -470,7 +503,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int sr
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -499,7 +533,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -513,6 +548,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -560,7 +598,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -574,6 +613,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -620,7 +662,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -634,6 +677,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -675,7 +721,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -689,6 +736,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -730,7 +780,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -773,7 +824,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -818,7 +870,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int sr
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -861,7 +914,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -890,7 +944,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int s
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -918,7 +973,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int sr
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -945,7 +1001,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -972,7 +1029,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int s
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -999,7 +1057,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -1029,7 +1088,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int sr
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
|
@ -43,9 +43,12 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -62,6 +65,9 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -112,6 +118,9 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -163,6 +172,9 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -215,6 +227,9 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -301,7 +316,6 @@ __kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src
|
||||
*((__global char4 *)((__global char *)dst + dst_index)) = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
|
@ -43,14 +43,18 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************bitwise_xor with mask**************************************/
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -64,6 +68,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -91,7 +98,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -105,6 +113,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -132,7 +143,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -146,6 +158,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -171,7 +186,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -185,6 +201,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -210,7 +229,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -242,7 +262,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -273,9 +294,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C1_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -308,8 +329,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -323,6 +344,9 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -347,7 +371,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -361,6 +386,9 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -384,7 +412,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -413,7 +442,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -442,7 +472,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1
|
||||
*((__global short2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -471,7 +502,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -501,7 +533,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C2_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -533,8 +566,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -548,6 +581,9 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
|
||||
@ -596,7 +632,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -610,6 +647,9 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
|
||||
@ -657,7 +697,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -671,6 +712,9 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
|
||||
@ -713,7 +757,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -727,6 +772,9 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
|
||||
@ -769,7 +817,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -813,7 +862,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -858,7 +908,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C3_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -905,8 +956,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
@ -937,7 +988,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_bitwise_xor_with_mask_C4_D6 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *src2, int src2_step, int src2_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
|
@ -42,17 +42,19 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//
|
||||
#if defined (__ATI__)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (__NVIDIA__)
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************xor with scalar without mask**************************************/
|
||||
__kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -63,6 +65,9 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -86,7 +91,8 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -97,6 +103,9 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
|
||||
@ -119,7 +128,8 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -131,6 +141,9 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -150,7 +163,8 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
|
||||
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -162,6 +176,9 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -181,7 +198,8 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
|
||||
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -202,7 +220,8 @@ __kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, i
|
||||
*((__global int *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -234,7 +253,8 @@ __kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step,
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C1_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -256,7 +276,8 @@ __kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -268,6 +289,9 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -290,7 +314,8 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -302,6 +327,9 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
|
||||
@ -322,7 +350,8 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -343,7 +372,8 @@ __kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -364,7 +394,8 @@ __kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step,
|
||||
*((__global short2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C2_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -384,7 +415,8 @@ __kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, i
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -406,7 +438,8 @@ __kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C2_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -428,7 +461,8 @@ __kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -440,6 +474,9 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
|
||||
@ -484,7 +521,8 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -496,6 +534,9 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
|
||||
@ -539,7 +580,8 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -551,6 +593,9 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
@ -589,7 +634,8 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -601,6 +647,9 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
|
||||
@ -639,7 +688,8 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -673,7 +723,8 @@ __kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, i
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -708,7 +759,8 @@ __kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C3_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -743,7 +795,8 @@ __kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, i
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -765,7 +818,8 @@ __kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step,
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -786,7 +840,8 @@ __kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -806,7 +861,8 @@ __kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -826,7 +882,8 @@ __kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step,
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -846,7 +903,8 @@ __kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, i
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
@ -869,7 +927,8 @@ __kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step,
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_xor_C4_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_C4_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
{
|
||||
|
@ -42,17 +42,20 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#if defined (__ATI__)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (__NVIDIA__)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**************************************bitwise_xor with scalar with mask**************************************/
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -65,6 +68,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -90,7 +96,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -103,6 +110,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -127,7 +137,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -140,6 +151,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -161,7 +175,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -174,6 +189,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -195,7 +213,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int
|
||||
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -223,7 +242,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (__global int *src1, int
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -252,7 +272,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src
|
||||
}
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -280,7 +301,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (__global short *src1, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -293,6 +315,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -316,7 +341,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -329,6 +355,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int s
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -351,7 +380,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -378,7 +408,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -405,7 +436,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (__global short *src1, int
|
||||
*((__global short2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -432,7 +464,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (__global int *src1, int sr
|
||||
*((__global int2 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -461,7 +494,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -489,7 +523,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (__global short *src1, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -502,6 +537,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -549,7 +587,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -562,6 +601,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int s
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -608,7 +650,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -621,6 +664,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -662,7 +708,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int
|
||||
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -675,6 +722,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int
|
||||
{
|
||||
x = x << 1;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -716,7 +766,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int
|
||||
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -758,7 +809,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (__global int *src1, int sr
|
||||
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -801,7 +853,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
@ -844,7 +897,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (__global short *src1, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (
|
||||
__global uchar *src1, int src1_step, int src1_offset,
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
uchar4 src2, int rows, int cols, int dst_step1)
|
||||
@ -872,7 +926,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int
|
||||
}
|
||||
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char4 src2, int rows, int cols, int dst_step1)
|
||||
@ -899,7 +954,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (
|
||||
__global ushort *src1, int src1_step, int src1_offset,
|
||||
__global ushort *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
ushort4 src2, int rows, int cols, int dst_step1)
|
||||
@ -925,7 +981,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int
|
||||
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short4 src2, int rows, int cols, int dst_step1)
|
||||
@ -951,7 +1008,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (__global short *src1, int
|
||||
*((__global short4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (
|
||||
__global int *src1, int src1_step, int src1_offset,
|
||||
__global int *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
int4 src2, int rows, int cols, int dst_step1)
|
||||
@ -977,7 +1035,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (__global int *src1, int sr
|
||||
*((__global int4 *)((__global char *)dst + dst_index)) = data;
|
||||
}
|
||||
}
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (
|
||||
__global char *src1, int src1_step, int src1_offset,
|
||||
__global char *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
char16 src2, int rows, int cols, int dst_step1)
|
||||
@ -1006,7 +1065,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (__global char *src1, int s
|
||||
}
|
||||
}
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
|
||||
__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (
|
||||
__global short *src1, int src1_step, int src1_offset,
|
||||
__global short *dst, int dst_step, int dst_offset,
|
||||
__global uchar *mask, int mask_step, int mask_offset,
|
||||
short16 src2, int rows, int cols, int dst_step1)
|
||||
|
@ -43,7 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -62,6 +66,9 @@ __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -114,6 +121,9 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -165,6 +175,9 @@ __kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -217,6 +230,9 @@ __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -265,6 +281,9 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -275,7 +294,8 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
|
||||
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src2_index < 0)
|
||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
|
||||
if(src2_index < 0)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||
@ -307,6 +327,9 @@ __kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int sr
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 3) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
|
||||
@ -358,6 +381,9 @@ __kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -409,6 +435,9 @@ __kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -462,6 +491,9 @@ __kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -511,6 +543,9 @@ __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -560,6 +595,9 @@ __kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -609,6 +647,9 @@ __kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int sr
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 3) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
|
||||
@ -660,6 +701,9 @@ __kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -714,6 +758,9 @@ __kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -769,6 +816,9 @@ __kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -820,6 +870,9 @@ __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -869,6 +922,9 @@ __kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -920,6 +976,9 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 3)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
|
||||
@ -942,7 +1001,8 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
|
||||
double4 tmp;
|
||||
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
|
||||
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
|
||||
} uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||
}
|
||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
|
||||
|
||||
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
|
||||
@ -954,3 +1014,4 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -43,7 +43,11 @@
|
||||
//
|
||||
//M*/
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
/***********************************Compare NE*******************************/
|
||||
__kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src1_offset,
|
||||
@ -58,6 +62,9 @@ __kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -110,6 +117,9 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -162,6 +172,9 @@ __kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -210,6 +223,9 @@ __kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -259,6 +275,9 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -269,7 +288,8 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
|
||||
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
|
||||
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
|
||||
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
|
||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src1_index < 0)
|
||||
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
|
||||
if(src1_index < 0)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
|
||||
@ -306,6 +326,9 @@ __kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int sr
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 3) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
|
||||
@ -358,6 +381,9 @@ __kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -410,6 +436,9 @@ __kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -463,6 +492,9 @@ __kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -512,6 +544,9 @@ __kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -564,6 +599,9 @@ __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -613,6 +651,9 @@ __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int sr
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 3) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
|
||||
@ -664,6 +705,9 @@ __kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -717,6 +761,9 @@ __kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int sr
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -770,6 +817,9 @@ __kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -819,6 +869,9 @@ __kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -867,6 +920,9 @@ __kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 2)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
|
||||
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
|
||||
@ -915,6 +971,9 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 3)& 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
|
||||
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
|
||||
@ -952,3 +1011,5 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -44,7 +44,11 @@
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
typedef double F ;
|
||||
typedef double4 F4;
|
||||
#define convert_F4 convert_double4
|
||||
@ -56,34 +60,24 @@ typedef float4 F4;
|
||||
#define convert_F float
|
||||
#endif
|
||||
|
||||
uchar round2_uchar(F v){
|
||||
|
||||
uchar v1 = convert_uchar_sat(round(v));
|
||||
//uchar v2 = convert_uchar_sat(v+(v>=0 ? 0.5 : -0.5));
|
||||
|
||||
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
|
||||
inline uchar round2_uchar(F v)
|
||||
{
|
||||
return convert_uchar_sat(round(v));
|
||||
}
|
||||
|
||||
ushort round2_ushort(F v){
|
||||
|
||||
ushort v1 = convert_ushort_sat(round(v));
|
||||
//ushort v2 = convert_ushort_sat(v+(v>=0 ? 0.5 : -0.5));
|
||||
|
||||
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
|
||||
inline ushort round2_ushort(F v)
|
||||
{
|
||||
return convert_ushort_sat(round(v));
|
||||
}
|
||||
short round2_short(F v){
|
||||
|
||||
short v1 = convert_short_sat(round(v));
|
||||
//short v2 = convert_short_sat(v+(v>=0 ? 0.5 : -0.5));
|
||||
|
||||
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
|
||||
inline short round2_short(F v)
|
||||
{
|
||||
return convert_short_sat(round(v));
|
||||
}
|
||||
int round2_int(F v){
|
||||
|
||||
int v1 = convert_int_sat(round(v));
|
||||
//int v2 = convert_int_sat(v+(v>=0 ? 0.5 : -0.5));
|
||||
|
||||
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
|
||||
inline int round2_int(F v)
|
||||
{
|
||||
return convert_int_sat(round(v));
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////divide///////////////////////////////////////////////////
|
||||
@ -94,39 +88,41 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
|
||||
__global uchar *dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, int dst_step1, F scalar)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int2 coor = (int2)(get_global_id(0), get_global_id(1));
|
||||
|
||||
if (x < cols && y < rows)
|
||||
if (coor.x < cols && coor.y < rows)
|
||||
{
|
||||
x = x << 2;
|
||||
coor.x = coor.x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
int2 src_index = (int2)(mad24(coor.y, src1_step, coor.x + src1_offset - dst_align),
|
||||
mad24(coor.y, src2_step, coor.x + src2_offset - dst_align));
|
||||
|
||||
int dst_start = mad24(y, dst_step, dst_offset);
|
||||
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
|
||||
int4 dst_args = (int4)(mad24(coor.y, dst_step, dst_offset),
|
||||
mad24(coor.y, dst_step, dst_offset + dst_step1),
|
||||
mad24(coor.y, dst_step, dst_offset + coor.x & (int)0xfffffffc),
|
||||
0);
|
||||
|
||||
uchar4 src1_data = vload4(0, src1 + src1_index);
|
||||
uchar4 src2_data = vload4(0, src2 + src2_index);
|
||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
|
||||
uchar4 src1_data = vload4(0, src1 + src_index.x);
|
||||
uchar4 src2_data = vload4(0, src2 + src_index.y);
|
||||
uchar4 dst_data = *((__global uchar4 *)(dst + dst_args.z));
|
||||
|
||||
F4 tmp = convert_F4(src1_data) * scalar;
|
||||
|
||||
uchar4 tmp_data;
|
||||
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (F)src2_data.x);
|
||||
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (F)src2_data.y);
|
||||
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (F)src2_data.z);
|
||||
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (F)src2_data.w);
|
||||
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / src2_data.x);
|
||||
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / src2_data.y);
|
||||
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / src2_data.z);
|
||||
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / src2_data.w);
|
||||
|
||||
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
|
||||
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
|
||||
dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
|
||||
dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
|
||||
dst_data.x = ((dst_args.z + 0 >= dst_args.x) && (dst_args.z + 0 < dst_args.y)) ? tmp_data.x : dst_data.x;
|
||||
dst_data.y = ((dst_args.z + 1 >= dst_args.x) && (dst_args.z + 1 < dst_args.y)) ? tmp_data.y : dst_data.y;
|
||||
dst_data.z = ((dst_args.z + 2 >= dst_args.x) && (dst_args.z + 2 < dst_args.y)) ? tmp_data.z : dst_data.z;
|
||||
dst_data.w = ((dst_args.z + 3 >= dst_args.x) && (dst_args.z + 3 < dst_args.y)) ? tmp_data.w : dst_data.w;
|
||||
|
||||
*((__global uchar4 *)(dst + dst_index)) = dst_data;
|
||||
*((__global uchar4 *)(dst + dst_args.z)) = dst_data;
|
||||
}
|
||||
}
|
||||
|
||||
@ -142,6 +138,9 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -182,6 +181,9 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -297,6 +299,9 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src_index = mad24(y, src_step, x + src_offset - dst_align);
|
||||
|
||||
@ -333,6 +338,9 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
|
||||
|
||||
@ -368,6 +376,9 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
|
||||
|
||||
@ -455,3 +466,5 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -44,7 +44,11 @@
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -61,6 +65,9 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
|
||||
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
|
||||
@ -116,6 +123,9 @@ __kernel void arithm_flip_rows_D1 (__global char *src, int src_step, int src_off
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
|
||||
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
|
||||
@ -158,6 +168,9 @@ __kernel void arithm_flip_rows_D2 (__global ushort *src, int src_step, int src_o
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset >> 1) & 3) << 1)
|
||||
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
|
||||
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
|
||||
@ -200,6 +213,9 @@ __kernel void arithm_flip_rows_D3 (__global short *src, int src_step, int src_of
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (((dst_offset >> 1) & 3) << 1)
|
||||
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
|
||||
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
|
||||
|
@ -16,7 +16,6 @@
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -44,11 +43,16 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined DOUBLE_SUPPORT
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#elif defined (cl_amd_fp64)
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int4 round_int4(float4 v){
|
||||
int4 round_int4(float4 v)
|
||||
{
|
||||
v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5);
|
||||
v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5);
|
||||
v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5);
|
||||
@ -56,7 +60,8 @@ int4 round_int4(float4 v){
|
||||
|
||||
return convert_int4_sat(v);
|
||||
}
|
||||
uint4 round_uint4(float4 v){
|
||||
uint4 round_uint4(float4 v)
|
||||
{
|
||||
v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5);
|
||||
v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5);
|
||||
v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5);
|
||||
@ -64,7 +69,8 @@ uint4 round_uint4(float4 v){
|
||||
|
||||
return convert_uint4_sat(v);
|
||||
}
|
||||
long round_int(float v){
|
||||
long round_int(float v)
|
||||
{
|
||||
v = v + (v > 0 ? 0.5 : -0.5);
|
||||
|
||||
return convert_int_sat(v);
|
||||
@ -85,6 +91,9 @@ __kernel void arithm_mul_D0 (__global uchar *src1, int src1_step, int src1_offse
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align (dst_offset & 3)
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
|
||||
@ -130,6 +139,9 @@ __kernel void arithm_mul_D2 (__global ushort *src1, int src1_step, int src1_offs
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
@ -166,6 +178,9 @@ __kernel void arithm_mul_D3 (__global short *src1, int src1_step, int src1_offse
|
||||
{
|
||||
x = x << 2;
|
||||
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
|
Loading…
x
Reference in New Issue
Block a user