multiple rows in KF kernel

This commit is contained in:
Ilya Lavrenov
2014-05-14 15:42:30 +04:00
parent 5ee398bfd6
commit 15738bf7ef
6 changed files with 124 additions and 88 deletions

View File

@@ -145,6 +145,7 @@
#define EXTRA_PARAMS
#define EXTRA_INDEX
#define EXTRA_INDEX_ADD
#if defined OP_ADD
#define PROCESS_ELEM storedst(convertToDT(srcelem1 + srcelem2))
@@ -363,7 +364,9 @@
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
#undef EXTRA_INDEX
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset2))
#define EXTRA_INDEX int dst_index2 = mad24(y0, dststep2, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset2))
#undef EXTRA_INDEX_ADD
#define EXTRA_INDEX_ADD dst_index2 += dststep2
#endif
#if defined UNARY_OP || defined MASK_UNARY_OP
@@ -393,18 +396,25 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
int y0 = get_global_id(1) * rowsPerWI;
if (x < cols && y < rows)
if (x < cols)
{
int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
int src2_index = mad24(y, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
int src2_index = mad24(y0, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
#endif
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
int dst_index = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
EXTRA_INDEX;
PROCESS_ELEM;
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, dst_index += dststep)
{
PROCESS_ELEM;
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
src2_index += srcstep2;
#endif
EXTRA_INDEX_ADD;
}
}
}
@@ -417,19 +427,21 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
int y0 = get_global_id(1) * rowsPerWI;
if (x < cols && y < rows)
if (x < cols)
{
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int src2_index = mad24(y, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
int mask_index = mad24(y0, maskstep, x + maskoffset);
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int src2_index = mad24(y0, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
int dst_index = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
PROCESS_ELEM;
}
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, src2_index += srcstep2,
mask_index += maskstep, dst_index += dststep)
if (mask[mask_index])
{
PROCESS_ELEM;
}
}
}
@@ -440,14 +452,17 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
int y0 = get_global_id(1) * rowsPerWI;
if (x < cols && y < rows)
if (x < cols)
{
int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int dst_index = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
PROCESS_ELEM;
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, dst_index += dststep)
{
PROCESS_ELEM;
}
}
}
@@ -459,18 +474,19 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
int y0 = get_global_id(1);
if (x < cols && y < rows)
if (x < cols)
{
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
int mask_index = mad24(y0, maskstep, x + maskoffset);
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int dst_index = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
PROCESS_ELEM;
}
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, mask_index += maskstep, dst_index += dststep)
if (mask[mask_index])
{
PROCESS_ELEM;
}
}
}