Merge pull request #775 from bitwangyaoyao:2.4_fixerr

This commit is contained in:
Andrey Kamaev 2013-04-05 12:59:45 +04:00 committed by OpenCV Buildbot
commit a2d27429e4
23 changed files with 3080 additions and 2184 deletions

View File

@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -63,6 +67,9 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -111,6 +118,9 @@ __kernel void arithm_absdiff_D2 (__global ushort *src1, int src1_step, int src1_
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -145,6 +155,9 @@ __kernel void arithm_absdiff_D3 (__global short *src1, int src1_step, int src1_o
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -249,6 +262,9 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -288,6 +304,9 @@ __kernel void arithm_s_absdiff_C1_D2 (__global ushort *src1, int src1_step, in
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -319,6 +338,9 @@ __kernel void arithm_s_absdiff_C1_D3 (__global short *src1, int src1_step, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -422,6 +444,9 @@ __kernel void arithm_s_absdiff_C2_D0 (__global uchar *src1, int src1_step, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -564,6 +589,9 @@ __kernel void arithm_s_absdiff_C3_D0 (__global uchar *src1, int src1_step, int
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
@ -618,6 +646,9 @@ __kernel void arithm_s_absdiff_C3_D2 (__global ushort *src1, int src1_step, in
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
@ -668,6 +699,9 @@ __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));

View File

@ -45,7 +45,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -64,6 +68,9 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -112,6 +119,9 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -147,6 +157,9 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -252,6 +265,9 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -311,6 +327,9 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -348,6 +367,9 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -477,6 +499,9 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -664,6 +689,9 @@ __kernel void arithm_add_with_mask_C3_D0 (__global uchar *src1, int src1_step, i
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
@ -724,6 +752,9 @@ __kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
@ -780,6 +811,9 @@ __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, i
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));

View File

@ -42,8 +42,12 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined DOUBLE_SUPPORT
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
typedef double F;
#else
typedef float F;
@ -65,6 +69,9 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -122,6 +129,9 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
@ -182,6 +192,9 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
@ -243,6 +256,9 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
#define bitOfInt (sizeof(int)== 4 ? 2: 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> bitOfInt) & 3)
int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt));
@ -304,6 +320,9 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
@ -366,6 +385,9 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));

View File

@ -44,9 +44,13 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
/**************************************add with scalar without mask**************************************/
__kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -59,6 +63,9 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -99,6 +106,9 @@ __kernel void arithm_s_add_C1_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -131,6 +141,9 @@ __kernel void arithm_s_add_C1_D3 (__global short *src1, int src1_step, int src
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -233,6 +246,9 @@ __kernel void arithm_s_add_C2_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -378,6 +394,9 @@ __kernel void arithm_s_add_C3_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
@ -432,6 +451,9 @@ __kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
@ -482,6 +504,9 @@ __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));

View File

@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
/**************************************add with scalar with mask**************************************/
@ -61,6 +65,9 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -111,6 +118,9 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -146,6 +156,9 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -267,6 +280,9 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -443,6 +459,9 @@ __kernel void arithm_s_add_with_mask_C3_D0 (__global uchar *src1, int src1_ste
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -501,6 +520,9 @@ __kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_st
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -555,6 +577,9 @@ __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_ste
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);

View File

@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -62,6 +66,9 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -112,6 +119,9 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -163,6 +173,9 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -215,6 +228,9 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));

View File

@ -43,14 +43,18 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and with mask**************************************/
__kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -64,6 +68,9 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -91,7 +98,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1
__kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -105,6 +113,9 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -132,7 +143,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_
__kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -146,6 +158,9 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -171,7 +186,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src
__kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -185,6 +201,9 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -210,7 +229,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
__kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -242,7 +262,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1
__kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -274,8 +295,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C1_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -305,11 +326,11 @@ __kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_
}
}
#endif
__kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -323,6 +344,9 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -347,7 +371,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -361,6 +386,9 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -384,7 +412,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -413,7 +442,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -442,7 +472,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -471,7 +502,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -500,8 +532,9 @@ __kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_
*((__global char8 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -530,11 +563,11 @@ __kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_
*((__global char16 *)((__global char *)dst + dst_index)) = data;
}
}
#endif
__kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -548,6 +581,9 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
@ -596,7 +632,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -610,6 +647,9 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
@ -657,7 +697,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -671,6 +712,9 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
@ -713,7 +757,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -727,6 +772,9 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
@ -769,7 +817,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -813,7 +862,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -858,7 +908,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C3_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -905,8 +956,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_
#endif
__kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -937,7 +988,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_and_with_mask_C4_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,

View File

@ -42,17 +42,20 @@
// the use of this software, even if advised of the possibility of such damage.
//
//
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and with scalar without mask**************************************/
__kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -63,6 +66,9 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -86,7 +92,8 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -97,6 +104,9 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -119,7 +129,8 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -131,6 +142,9 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -150,7 +164,8 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -162,6 +177,9 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -181,7 +199,8 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -202,7 +221,8 @@ __kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -232,9 +252,9 @@ __kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step,
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@ -256,7 +276,8 @@ __kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -268,6 +289,9 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -290,7 +314,8 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -302,6 +327,9 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -322,7 +350,8 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -343,7 +372,8 @@ __kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -364,7 +394,8 @@ __kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -384,7 +415,8 @@ __kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, i
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -406,7 +438,8 @@ __kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@ -428,7 +461,8 @@ __kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -440,6 +474,9 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
@ -484,7 +521,8 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -496,6 +534,9 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
@ -539,7 +580,8 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -551,6 +593,9 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
@ -589,7 +634,8 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -601,6 +647,9 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
@ -639,7 +688,8 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -673,7 +723,8 @@ __kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -708,7 +759,8 @@ __kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@ -743,7 +795,8 @@ __kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -765,7 +818,8 @@ __kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -786,7 +840,8 @@ __kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -806,7 +861,8 @@ __kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -826,7 +882,8 @@ __kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -846,7 +903,8 @@ __kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, i
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -869,7 +927,8 @@ __kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{

View File

@ -42,17 +42,19 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and with scalar with mask**************************************/
__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -65,6 +67,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -90,7 +95,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -103,6 +109,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -127,7 +136,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -140,6 +150,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -161,7 +174,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -174,6 +188,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -195,7 +212,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -223,7 +241,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int
}
}
__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -252,7 +271,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -280,7 +300,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -293,6 +314,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -316,7 +340,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -329,6 +354,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -351,7 +379,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -378,7 +407,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -405,7 +435,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -432,7 +463,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int sr
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -461,7 +493,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -489,7 +522,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -502,6 +536,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -549,7 +586,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -562,6 +600,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -608,7 +649,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -621,6 +663,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -662,7 +707,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -675,6 +721,9 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -716,7 +765,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -758,7 +808,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int sr
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -801,7 +852,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -844,7 +896,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -872,7 +925,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -899,7 +953,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -925,7 +980,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -951,7 +1007,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -977,7 +1034,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int sr
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -1006,7 +1064,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -1055,3 +1114,4 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int sr
}
}
#endif

View File

@ -43,9 +43,12 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
@ -61,6 +64,9 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -101,6 +107,9 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -135,6 +144,9 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -170,6 +182,9 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));

View File

@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -62,6 +66,9 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -110,6 +117,9 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -147,6 +157,9 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -185,6 +198,9 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));

View File

@ -43,14 +43,18 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or with mask**************************************/
__kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -64,6 +68,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -91,7 +98,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_
__kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -105,6 +113,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -132,7 +143,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s
__kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -146,6 +158,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -171,7 +186,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1
__kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -185,6 +201,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -210,7 +229,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
__kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -242,7 +262,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_
__kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -273,9 +294,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_s
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -308,8 +329,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_s
#endif
__kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -323,6 +344,9 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -347,7 +371,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_
}
__kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -361,6 +386,9 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -384,7 +412,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -413,7 +442,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -442,7 +472,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -471,7 +502,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -501,7 +533,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -533,8 +566,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_s
#endif
__kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -548,6 +581,9 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
@ -596,7 +632,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_
}
__kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -610,6 +647,9 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
@ -657,7 +697,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -671,6 +712,9 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
@ -713,7 +757,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -727,6 +772,9 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
@ -769,7 +817,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -813,7 +862,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -858,7 +908,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -905,8 +956,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_s
#endif
__kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -937,7 +988,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_
}
__kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_s
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,

View File

@ -43,14 +43,19 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and with scalar without mask**************************************/
__kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -61,6 +66,9 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -84,7 +92,8 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -95,6 +104,9 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -117,7 +129,8 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -129,6 +142,9 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -148,7 +164,8 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -160,6 +177,9 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -179,7 +199,8 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -200,7 +221,8 @@ __kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, in
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -222,7 +244,8 @@ __kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -245,8 +268,8 @@ __kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -259,6 +282,9 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -280,7 +306,8 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -293,6 +320,9 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -313,7 +343,8 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -335,7 +366,8 @@ __kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -378,7 +410,8 @@ __kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, in
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -400,7 +433,8 @@ __kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -423,7 +457,8 @@ __kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -436,6 +471,9 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
@ -480,7 +518,8 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -493,6 +532,9 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
@ -536,7 +578,8 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -549,6 +592,9 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
@ -587,7 +633,8 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -600,6 +647,9 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
@ -638,7 +688,8 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -673,7 +724,8 @@ __kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, in
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -706,7 +758,8 @@ __kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -742,7 +795,8 @@ __kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -765,7 +819,8 @@ __kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -787,7 +842,8 @@ __kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -808,7 +864,8 @@ __kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step,
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -829,7 +886,8 @@ __kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -850,7 +908,8 @@ __kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, in
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -874,7 +933,8 @@ __kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)

View File

@ -43,14 +43,18 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or with scalar with mask**************************************/
__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -64,6 +68,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -89,7 +96,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -103,6 +111,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -127,7 +138,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -141,6 +153,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -162,7 +177,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -176,6 +192,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -197,7 +216,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -226,7 +246,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int s
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -254,9 +275,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -285,7 +306,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src
}
}
#endif
__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -299,6 +321,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -322,7 +347,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -336,6 +362,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -358,7 +387,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -386,7 +416,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -414,7 +445,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int s
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -442,7 +474,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -470,7 +503,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int sr
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -499,7 +533,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -513,6 +548,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -560,7 +598,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -574,6 +613,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -620,7 +662,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -634,6 +677,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -675,7 +721,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -689,6 +736,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -730,7 +780,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -773,7 +824,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -818,7 +870,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int sr
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -861,7 +914,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src
}
}
#endif
__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -890,7 +944,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -918,7 +973,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -945,7 +1001,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -972,7 +1029,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int s
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -999,7 +1057,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -1029,7 +1088,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int sr
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)

View File

@ -43,9 +43,12 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
@ -62,6 +65,9 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -112,6 +118,9 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -163,6 +172,9 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -215,6 +227,9 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -301,7 +316,6 @@ __kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src
*((__global char4 *)((__global char *)dst + dst_index)) = tmp;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,

View File

@ -43,14 +43,18 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor with mask**************************************/
__kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -64,6 +68,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -91,7 +98,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1
__kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -105,6 +113,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -132,7 +143,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_
__kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -146,6 +158,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -171,7 +186,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src
__kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -185,6 +201,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -210,7 +229,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
__kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -242,7 +262,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1
__kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -273,9 +294,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C1_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -308,8 +329,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_
__kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -323,6 +344,9 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -347,7 +371,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -361,6 +386,9 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -384,7 +412,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -413,7 +442,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -442,7 +472,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -471,7 +502,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -501,7 +533,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -533,8 +566,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_
#endif
__kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -548,6 +581,9 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
@ -596,7 +632,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -610,6 +647,9 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
@ -657,7 +697,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -671,6 +712,9 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
@ -713,7 +757,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -727,6 +772,9 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
@ -769,7 +817,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -813,7 +862,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -858,7 +908,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C3_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -905,8 +956,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_
#endif
__kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
@ -937,7 +988,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1
}
__kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_
}
}
__kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_xor_with_mask_C4_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,

View File

@ -42,17 +42,19 @@
// the use of this software, even if advised of the possibility of such damage.
//
//
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************xor with scalar without mask**************************************/
__kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -63,6 +65,9 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -86,7 +91,8 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -97,6 +103,9 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
@ -119,7 +128,8 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -131,6 +141,9 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -150,7 +163,8 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -162,6 +176,9 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -181,7 +198,8 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -202,7 +220,8 @@ __kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -234,7 +253,8 @@ __kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step,
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@ -256,7 +276,8 @@ __kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -268,6 +289,9 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -290,7 +314,8 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -302,6 +327,9 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
@ -322,7 +350,8 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -343,7 +372,8 @@ __kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -364,7 +394,8 @@ __kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -384,7 +415,8 @@ __kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, i
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -406,7 +438,8 @@ __kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@ -428,7 +461,8 @@ __kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -440,6 +474,9 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
@ -484,7 +521,8 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -496,6 +534,9 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
@ -539,7 +580,8 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -551,6 +593,9 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
@ -589,7 +634,8 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -601,6 +647,9 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
@ -639,7 +688,8 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -673,7 +723,8 @@ __kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, i
*((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -708,7 +759,8 @@ __kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{
@ -743,7 +795,8 @@ __kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, i
}
}
#endif
__kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
{
@ -765,7 +818,8 @@ __kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
@ -786,7 +840,8 @@ __kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step,
}
}
__kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
{
@ -806,7 +861,8 @@ __kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
@ -826,7 +882,8 @@ __kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step,
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
{
@ -846,7 +903,8 @@ __kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, i
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
@ -869,7 +927,8 @@ __kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step,
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
{

View File

@ -42,17 +42,20 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor with scalar with mask**************************************/
__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -65,6 +68,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -90,7 +96,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -103,6 +110,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -127,7 +137,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -140,6 +151,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -161,7 +175,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -174,6 +189,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -195,7 +213,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -223,7 +242,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (__global int *src1, int
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -252,7 +272,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -280,7 +301,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (__global short *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -293,6 +315,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -316,7 +341,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -329,6 +355,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int s
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -351,7 +380,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -378,7 +408,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -405,7 +436,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (__global short *src1, int
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -432,7 +464,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (__global int *src1, int sr
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -461,7 +494,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (__global char *src1, int s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -489,7 +523,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (__global short *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -502,6 +537,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -549,7 +587,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -562,6 +601,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int s
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -608,7 +650,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -621,6 +664,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -662,7 +708,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -675,6 +722,9 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int
{
x = x << 1;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@ -716,7 +766,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -758,7 +809,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (__global int *src1, int sr
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -801,7 +853,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (__global char *src1, int s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
@ -844,7 +897,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (__global short *src1, int sr
}
}
#endif
__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
@ -872,7 +926,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int
}
__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
@ -899,7 +954,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (__global char *src1, int s
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
@ -925,7 +981,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
@ -951,7 +1008,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (__global short *src1, int
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
@ -977,7 +1035,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (__global int *src1, int sr
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
@ -1006,7 +1065,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (__global char *src1, int s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)

View File

@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -62,6 +66,9 @@ __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -114,6 +121,9 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -165,6 +175,9 @@ __kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -217,6 +230,9 @@ __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -265,6 +281,9 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -275,7 +294,8 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src2_index < 0)
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
if(src2_index < 0)
{
float4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
@ -307,6 +327,9 @@ __kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
@ -358,6 +381,9 @@ __kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -409,6 +435,9 @@ __kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -462,6 +491,9 @@ __kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -511,6 +543,9 @@ __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -560,6 +595,9 @@ __kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -609,6 +647,9 @@ __kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
@ -660,6 +701,9 @@ __kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -714,6 +758,9 @@ __kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -769,6 +816,9 @@ __kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -820,6 +870,9 @@ __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -869,6 +922,9 @@ __kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -920,6 +976,9 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3)& 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
@ -942,7 +1001,8 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
double4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
} uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
@ -954,3 +1014,4 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
}
}
#endif

View File

@ -43,7 +43,11 @@
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
/***********************************Compare NE*******************************/
__kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src1_offset,
@ -58,6 +62,9 @@ __kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -110,6 +117,9 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -162,6 +172,9 @@ __kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -210,6 +223,9 @@ __kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -259,6 +275,9 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -269,7 +288,8 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src1_index < 0)
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
if(src1_index < 0)
{
float4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
@ -306,6 +326,9 @@ __kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
@ -358,6 +381,9 @@ __kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -410,6 +436,9 @@ __kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -463,6 +492,9 @@ __kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -512,6 +544,9 @@ __kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -564,6 +599,9 @@ __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -613,6 +651,9 @@ __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
@ -664,6 +705,9 @@ __kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -717,6 +761,9 @@ __kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int sr
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -770,6 +817,9 @@ __kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -819,6 +869,9 @@ __kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -867,6 +920,9 @@ __kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
@ -915,6 +971,9 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
if (x < cols && y < rows)
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 3)& 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
@ -952,3 +1011,5 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
}
}
#endif

View File

@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
typedef double F ;
typedef double4 F4;
#define convert_F4 convert_double4
@ -56,34 +60,24 @@ typedef float4 F4;
#define convert_F float
#endif
uchar round2_uchar(F v){
uchar v1 = convert_uchar_sat(round(v));
//uchar v2 = convert_uchar_sat(v+(v>=0 ? 0.5 : -0.5));
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline uchar round2_uchar(F v)
{
return convert_uchar_sat(round(v));
}
ushort round2_ushort(F v){
ushort v1 = convert_ushort_sat(round(v));
//ushort v2 = convert_ushort_sat(v+(v>=0 ? 0.5 : -0.5));
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline ushort round2_ushort(F v)
{
return convert_ushort_sat(round(v));
}
short round2_short(F v){
short v1 = convert_short_sat(round(v));
//short v2 = convert_short_sat(v+(v>=0 ? 0.5 : -0.5));
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline short round2_short(F v)
{
return convert_short_sat(round(v));
}
int round2_int(F v){
int v1 = convert_int_sat(round(v));
//int v2 = convert_int_sat(v+(v>=0 ? 0.5 : -0.5));
return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline int round2_int(F v)
{
return convert_int_sat(round(v));
}
///////////////////////////////////////////////////////////////////////////////////////
////////////////////////////divide///////////////////////////////////////////////////
@ -94,39 +88,41 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1, F scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
int2 coor = (int2)(get_global_id(0), get_global_id(1));
if (x < cols && y < rows)
if (coor.x < cols && coor.y < rows)
{
x = x << 2;
coor.x = coor.x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int2 src_index = (int2)(mad24(coor.y, src1_step, coor.x + src1_offset - dst_align),
mad24(coor.y, src2_step, coor.x + src2_offset - dst_align));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int4 dst_args = (int4)(mad24(coor.y, dst_step, dst_offset),
mad24(coor.y, dst_step, dst_offset + dst_step1),
mad24(coor.y, dst_step, dst_offset + coor.x & (int)0xfffffffc),
0);
uchar4 src1_data = vload4(0, src1 + src1_index);
uchar4 src2_data = vload4(0, src2 + src2_index);
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 src1_data = vload4(0, src1 + src_index.x);
uchar4 src2_data = vload4(0, src2 + src_index.y);
uchar4 dst_data = *((__global uchar4 *)(dst + dst_args.z));
F4 tmp = convert_F4(src1_data) * scalar;
uchar4 tmp_data;
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (F)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (F)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (F)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (F)src2_data.w);
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / src2_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
dst_data.x = ((dst_args.z + 0 >= dst_args.x) && (dst_args.z + 0 < dst_args.y)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_args.z + 1 >= dst_args.x) && (dst_args.z + 1 < dst_args.y)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_args.z + 2 >= dst_args.x) && (dst_args.z + 2 < dst_args.y)) ? tmp_data.z : dst_data.z;
dst_data.w = ((dst_args.z + 3 >= dst_args.x) && (dst_args.z + 3 < dst_args.y)) ? tmp_data.w : dst_data.w;
*((__global uchar4 *)(dst + dst_index)) = dst_data;
*((__global uchar4 *)(dst + dst_args.z)) = dst_data;
}
}
@ -142,6 +138,9 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -182,6 +181,9 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -297,6 +299,9 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index = mad24(y, src_step, x + src_offset - dst_align);
@ -333,6 +338,9 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
@ -368,6 +376,9 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
@ -455,3 +466,5 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
}
}
#endif

View File

@ -44,7 +44,11 @@
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -61,6 +65,9 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
@ -116,6 +123,9 @@ __kernel void arithm_flip_rows_D1 (__global char *src, int src_step, int src_off
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
@ -158,6 +168,9 @@ __kernel void arithm_flip_rows_D2 (__global ushort *src, int src_step, int src_o
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset >> 1) & 3) << 1)
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
@ -200,6 +213,9 @@ __kernel void arithm_flip_rows_D3 (__global short *src, int src_step, int src_of
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset >> 1) & 3) << 1)
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);

View File

@ -16,7 +16,6 @@
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Dachuan Zhao, dachuan@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@ -44,11 +43,16 @@
//
//M*/
#if defined DOUBLE_SUPPORT
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
int4 round_int4(float4 v){
int4 round_int4(float4 v)
{
v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5);
v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5);
v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5);
@ -56,7 +60,8 @@ int4 round_int4(float4 v){
return convert_int4_sat(v);
}
uint4 round_uint4(float4 v){
uint4 round_uint4(float4 v)
{
v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5);
v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5);
v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5);
@ -64,7 +69,8 @@ uint4 round_uint4(float4 v){
return convert_uint4_sat(v);
}
long round_int(float v){
long round_int(float v)
{
v = v + (v > 0 ? 0.5 : -0.5);
return convert_int_sat(v);
@ -85,6 +91,9 @@ __kernel void arithm_mul_D0 (__global uchar *src1, int src1_step, int src1_offse
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
@ -130,6 +139,9 @@ __kernel void arithm_mul_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
@ -166,6 +178,9 @@ __kernel void arithm_mul_D3 (__global short *src1, int src1_step, int src1_offse
{
x = x << 2;
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));