Merge pull request #1428 from ilya-lavrenov:ocl_pyr
This commit is contained in:
commit
ac9bc6423a
@ -43,37 +43,6 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
|
||||
uchar round_uchar_int(int v)
|
||||
{
|
||||
return (uchar)((uint)v <= 255 ? v : v > 0 ? 255 : 0);
|
||||
}
|
||||
|
||||
uchar round_uchar_float(float v)
|
||||
{
|
||||
return round_uchar_int(convert_int_sat_rte(v));
|
||||
}
|
||||
|
||||
uchar4 round_uchar4_int4(int4 v)
|
||||
{
|
||||
uchar4 result;
|
||||
result.x = (uchar)(v.x <= 255 ? v.x : v.x > 0 ? 255 : 0);
|
||||
result.y = (uchar)(v.y <= 255 ? v.y : v.y > 0 ? 255 : 0);
|
||||
result.z = (uchar)(v.z <= 255 ? v.z : v.z > 0 ? 255 : 0);
|
||||
result.w = (uchar)(v.w <= 255 ? v.w : v.w > 0 ? 255 : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
uchar4 round_uchar4_float4(float4 v)
|
||||
{
|
||||
return round_uchar4_int4(convert_int4_sat_rte(v));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int idx_row_low(int y, int last_row)
|
||||
{
|
||||
return abs(y) % (last_row + 1);
|
||||
@ -104,6 +73,10 @@ int idx_col(int x, int last_col)
|
||||
return idx_col_low(idx_col_high(x, last_col), last_col);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_8UC1 ///////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstCols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
@ -211,10 +184,14 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcRows,
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep + dst_x] = round_uchar_float(sum);
|
||||
dst[y * dstStep + dst_x] = convert_uchar_sat_rte(sum);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_8UC4 ///////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstCols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
@ -228,16 +205,16 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
float4 co1 = 0.375f;//(float4)(0.375f, 0.375f, 0.375f, 0.375f);
|
||||
float4 co2 = 0.25f;//(float4)(0.25f, 0.25f, 0.25f, 0.25f);
|
||||
float4 co3 = 0.0625f;//(float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
|
||||
float4 co1 = 0.375f;
|
||||
float4 co2 = 0.25f;
|
||||
float4 co3 = 0.0625f;
|
||||
|
||||
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
|
||||
{
|
||||
sum = co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[x]));
|
||||
sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[x]));
|
||||
sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[x]));
|
||||
sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[x]));
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
@ -247,9 +224,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[left_x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[left_x]));
|
||||
sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[left_x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[left_x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[left_x]));
|
||||
sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[left_x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[left_x]));
|
||||
sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[left_x]));
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
@ -260,9 +237,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[right_x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[right_x]));
|
||||
sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[right_x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[right_x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[right_x]));
|
||||
sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[right_x]));
|
||||
sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[right_x]));
|
||||
sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[right_x]));
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
@ -273,9 +250,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
|
||||
int col = idx_col(x, last_col);
|
||||
|
||||
sum = co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]));
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
@ -287,9 +264,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
|
||||
col = idx_col(left_x, last_col);
|
||||
|
||||
sum = co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]));
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
@ -302,9 +279,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
|
||||
col = idx_col(right_x, last_col);
|
||||
|
||||
sum = co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]));
|
||||
sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]));
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
@ -318,18 +295,490 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = co3 * smem[2 + tid2 - 2];
|
||||
sum = sum + co2 * smem[2 + tid2 - 1];
|
||||
sum = sum + co1 * smem[2 + tid2 ];
|
||||
sum = sum + co2 * smem[2 + tid2 + 1];
|
||||
sum = sum + co2 * smem[2 + tid2 - 1];
|
||||
sum = sum + co1 * smem[2 + tid2 ];
|
||||
sum = sum + co2 * smem[2 + tid2 + 1];
|
||||
sum = sum + co3 * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep / 4 + dst_x] = round_uchar4_float4(sum);
|
||||
dst[y * dstStep / 4 + dst_x] = convert_uchar4_sat_rte(sum);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16UC1 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrDown_C1_D2(__global ushort * srcData, int srcStep, int srcRows, int srcCols, __global ushort *dst, int dstStep, int dstCols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_group_id(1);
|
||||
|
||||
__local float smem[256 + 4];
|
||||
|
||||
float sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
|
||||
{
|
||||
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[x];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[x];
|
||||
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[x];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[x];
|
||||
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[x];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x];
|
||||
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[left_x];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x];
|
||||
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x];
|
||||
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[right_x];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x];
|
||||
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int col = idx_col(x, last_col);
|
||||
|
||||
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
col = idx_col(left_x, last_col);
|
||||
|
||||
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
col = idx_col(right_x, last_col);
|
||||
|
||||
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < 128)
|
||||
{
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep / 2 + dst_x] = convert_ushort_sat_rte(sum);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16UC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrDown_C4_D2(__global ushort4 * srcData, int srcStep, int srcRows, int srcCols, __global ushort4 *dst, int dstStep, int dstCols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_group_id(1);
|
||||
|
||||
__local float4 smem[256 + 4];
|
||||
|
||||
float4 sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
float4 co1 = 0.375f;
|
||||
float4 co2 = 0.25f;
|
||||
float4 co3 = 0.0625f;
|
||||
|
||||
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
|
||||
{
|
||||
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]);
|
||||
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]);
|
||||
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]);
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]);
|
||||
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]);
|
||||
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]);
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]);
|
||||
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]);
|
||||
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]);
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int col = idx_col(x, last_col);
|
||||
|
||||
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
col = idx_col(left_x, last_col);
|
||||
|
||||
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
col = idx_col(right_x, last_col);
|
||||
|
||||
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < 128)
|
||||
{
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = co3 * smem[2 + tid2 - 2];
|
||||
sum = sum + co2 * smem[2 + tid2 - 1];
|
||||
sum = sum + co1 * smem[2 + tid2 ];
|
||||
sum = sum + co2 * smem[2 + tid2 + 1];
|
||||
sum = sum + co3 * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep / 8 + dst_x] = convert_ushort4_sat_rte(sum);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16SC1 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrDown_C1_D3(__global short * srcData, int srcStep, int srcRows, int srcCols, __global short *dst, int dstStep, int dstCols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_group_id(1);
|
||||
|
||||
__local float smem[256 + 4];
|
||||
|
||||
float sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
|
||||
{
|
||||
sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[x];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[x];
|
||||
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[x];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[x];
|
||||
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[x];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x];
|
||||
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[left_x];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x];
|
||||
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x];
|
||||
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[right_x];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x];
|
||||
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int col = idx_col(x, last_col);
|
||||
|
||||
sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
col = idx_col(left_x, last_col);
|
||||
|
||||
sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
col = idx_col(right_x, last_col);
|
||||
|
||||
sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
|
||||
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
|
||||
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < 128)
|
||||
{
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep / 2 + dst_x] = convert_short_sat_rte(sum);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16SC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrDown_C4_D3(__global short4 * srcData, int srcStep, int srcRows, int srcCols, __global short4 *dst, int dstStep, int dstCols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_group_id(1);
|
||||
|
||||
__local float4 smem[256 + 4];
|
||||
|
||||
float4 sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
float4 co1 = 0.375f;
|
||||
float4 co2 = 0.25f;
|
||||
float4 co3 = 0.0625f;
|
||||
|
||||
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
|
||||
{
|
||||
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]);
|
||||
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]);
|
||||
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]);
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]);
|
||||
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]);
|
||||
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]);
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]);
|
||||
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]);
|
||||
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]);
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int col = idx_col(x, last_col);
|
||||
|
||||
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(0) < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
col = idx_col(left_x, last_col);
|
||||
|
||||
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(0) > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
col = idx_col(right_x, last_col);
|
||||
|
||||
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
|
||||
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(0) < 128)
|
||||
{
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = co3 * smem[2 + tid2 - 2];
|
||||
sum = sum + co2 * smem[2 + tid2 - 1];
|
||||
sum = sum + co1 * smem[2 + tid2 ];
|
||||
sum = sum + co2 * smem[2 + tid2 + 1];
|
||||
sum = sum + co3 * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
||||
if (dst_x < dstCols)
|
||||
dst[y * dstStep / 8 + dst_x] = convert_short4_sat_rte(sum);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_32FC1 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcRows, int srcCols, __global float *dst, int dstStep, int dstCols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
@ -441,6 +890,10 @@ __kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcRows,
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_32FC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstCols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
@ -454,16 +907,16 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
|
||||
const int last_row = srcRows - 1;
|
||||
const int last_col = srcCols - 1;
|
||||
|
||||
float4 co1 = 0.375f;//(float4)(0.375f, 0.375f, 0.375f, 0.375f);
|
||||
float4 co2 = 0.25f;//(float4)(0.25f, 0.25f, 0.25f, 0.25f);
|
||||
float4 co3 = 0.0625f;//(float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
|
||||
float4 co1 = 0.375f;
|
||||
float4 co2 = 0.25f;
|
||||
float4 co3 = 0.0625f;
|
||||
|
||||
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
|
||||
{
|
||||
sum = co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
@ -473,9 +926,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
|
||||
const int left_x = x - 2;
|
||||
|
||||
sum = co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
@ -486,9 +939,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
|
||||
const int right_x = x + 2;
|
||||
|
||||
sum = co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
@ -499,9 +952,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
|
||||
int col = idx_col(x, last_col);
|
||||
|
||||
sum = co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col];
|
||||
|
||||
smem[2 + get_local_id(0)] = sum;
|
||||
@ -513,9 +966,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
|
||||
col = idx_col(left_x, last_col);
|
||||
|
||||
sum = co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col];
|
||||
|
||||
smem[get_local_id(0)] = sum;
|
||||
@ -528,9 +981,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
|
||||
col = idx_col(right_x, last_col);
|
||||
|
||||
sum = co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col];
|
||||
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col];
|
||||
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col];
|
||||
|
||||
smem[4 + get_local_id(0)] = sum;
|
||||
@ -544,9 +997,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
|
||||
const int tid2 = get_local_id(0) * 2;
|
||||
|
||||
sum = co3 * smem[2 + tid2 - 2];
|
||||
sum = sum + co2 * smem[2 + tid2 - 1];
|
||||
sum = sum + co1 * smem[2 + tid2 ];
|
||||
sum = sum + co2 * smem[2 + tid2 + 1];
|
||||
sum = sum + co2 * smem[2 + tid2 - 1];
|
||||
sum = sum + co1 * smem[2 + tid2 ];
|
||||
sum = sum + co2 * smem[2 + tid2 + 1];
|
||||
sum = sum + co3 * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
|
||||
|
@ -46,18 +46,18 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
uchar get_valid_uchar(float data)
|
||||
{
|
||||
return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_8UC1 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
|
||||
__kernel void pyrUp_C1_D0(__global uchar* src, __global uchar* dst,
|
||||
int srcRows, int dstRows, int srcCols, int dstCols,
|
||||
int srcOffset, int dstOffset, int srcStep, int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
@ -144,15 +144,15 @@ __kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum);
|
||||
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16UC1 /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
|
||||
__kernel void pyrUp_C1_D2(__global ushort* src, __global ushort* dst,
|
||||
int srcRows, int dstRows, int srcCols, int dstCols,
|
||||
int srcOffset, int dstOffset, int srcStep, int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
@ -245,16 +245,116 @@ __kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum);
|
||||
dst[x + y * dstStep] = convert_ushort_sat_rte(4.0f * sum);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16SC1 /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrUp_C1_D3(__global short* src, __global short* dst,
|
||||
int srcRows, int dstRows, int srcCols, int dstCols,
|
||||
int srcOffset, int dstOffset, int srcStep, int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
__local float s_srcPatch[10][10];
|
||||
__local float s_dstPatch[20][16];
|
||||
|
||||
srcStep = srcStep >> 1;
|
||||
dstStep = dstStep >> 1;
|
||||
srcOffset = srcOffset >> 1;
|
||||
dstOffset = dstOffset >> 1;
|
||||
|
||||
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
|
||||
{
|
||||
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
|
||||
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(srcCols - 1,srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float sum = 0;
|
||||
|
||||
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
|
||||
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
|
||||
const bool eveny = ((get_local_id(1) & 1) == 0);
|
||||
const int tidx = get_local_id(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(1) < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(1) > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = 0;
|
||||
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_32FC1 /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
|
||||
__kernel void pyrUp_C1_D5(__global float* src, __global float* dst,
|
||||
int srcRows, int dstRows, int srcCols, int dstCols,
|
||||
int srcOffset, int dstOffset, int srcStep, int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
@ -346,15 +446,15 @@ __kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = (float)(4.0f * sum);
|
||||
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_8UC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
|
||||
__kernel void pyrUp_C4_D0(__global uchar4* src, __global uchar4* dst,
|
||||
int srcRows, int dstRows, int srcCols, int dstCols,
|
||||
int srcOffset, int dstOffset, int srcStep, int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
@ -451,17 +551,16 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
|
||||
sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
{
|
||||
dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16UC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
|
||||
__kernel void pyrUp_C4_D2(__global ushort4* src, __global ushort4* dst,
|
||||
int srcRows, int dstRows, int srcCols, int dstCols,
|
||||
int srcOffset, int dstOffset, int srcStep, int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
@ -560,17 +659,123 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
|
||||
sum = sum + co3 * s_dstPatch[2 + tidy + 2][get_local_id(0)];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
{
|
||||
dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16SC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void pyrUp_C4_D3(__global short4* src, __global short4* dst,
|
||||
int srcRows, int dstRows, int srcCols, int dstCols,
|
||||
int srcOffset, int dstOffset, int srcStep, int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
__local float4 s_srcPatch[10][10];
|
||||
__local float4 s_dstPatch[20][16];
|
||||
|
||||
srcOffset >>= 3;
|
||||
dstOffset >>= 3;
|
||||
srcStep >>= 3;
|
||||
dstStep >>= 3;
|
||||
|
||||
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
|
||||
{
|
||||
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
|
||||
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(srcCols - 1,srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = convert_float4(src[srcx + srcy * srcStep]);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float4 sum = (float4)(0,0,0,0);
|
||||
|
||||
const float4 evenFlag = (float4)((get_local_id(0) & 1) == 0);
|
||||
const float4 oddFlag = (float4)((get_local_id(0) & 1) != 0);
|
||||
const bool eveny = ((get_local_id(1) & 1) == 0);
|
||||
const int tidx = get_local_id(0);
|
||||
|
||||
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
|
||||
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
|
||||
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
|
||||
|
||||
|
||||
if(eveny)
|
||||
{
|
||||
sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + ( evenFlag* co1 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
|
||||
}
|
||||
|
||||
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
|
||||
if (get_local_id(1) < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * co1 ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
if (get_local_id(1) > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * co3) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * co2) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * co1) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * co2) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
|
||||
}
|
||||
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = 0;
|
||||
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
sum = sum + co3 * s_dstPatch[2 + tidy - 2][get_local_id(0)];
|
||||
sum = sum + co2 * s_dstPatch[2 + tidy - 1][get_local_id(0)];
|
||||
sum = sum + co1 * s_dstPatch[2 + tidy ][get_local_id(0)];
|
||||
sum = sum + co2 * s_dstPatch[2 + tidy + 1][get_local_id(0)];
|
||||
sum = sum + co3 * s_dstPatch[2 + tidy + 2][get_local_id(0)];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = convert_short4_sat_rte(4.0f * sum);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_32FC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
__kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
|
||||
__kernel void pyrUp_C4_D5(__global float4* src, __global float4* dst,
|
||||
int srcRows, int dstRows, int srcCols, int dstCols,
|
||||
int srcOffset, int dstOffset, int srcStep, int dstStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
@ -667,7 +872,5 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
|
||||
sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
{
|
||||
dst[x + y * dstStep] = 4.0f * sum;
|
||||
}
|
||||
}
|
||||
|
@ -73,24 +73,11 @@ static void pyrdown_run(const oclMat &src, const oclMat &dst)
|
||||
CV_Assert(src.depth() != CV_8S);
|
||||
|
||||
Context *clCxt = src.clCxt;
|
||||
//int channels = dst.channels();
|
||||
//int depth = dst.depth();
|
||||
|
||||
string kernelName = "pyrDown";
|
||||
|
||||
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1}
|
||||
//};
|
||||
|
||||
//size_t vector_length = vector_lengths[channels-1][depth];
|
||||
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
|
||||
|
||||
size_t localThreads[3] = { 256, 1, 1 };
|
||||
size_t globalThreads[3] = { src.cols, dst.rows, 1};
|
||||
|
||||
//int dst_step1 = dst.cols * dst.elemSize();
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
|
||||
@ -107,7 +94,9 @@ static void pyrdown_run(const oclMat &src, const oclMat &dst)
|
||||
|
||||
void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
|
||||
{
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
int depth = src.depth(), channels = src.channels();
|
||||
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F);
|
||||
CV_Assert(channels == 1 || channels == 3 || channels == 4);
|
||||
|
||||
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
||||
|
||||
|
@ -61,6 +61,11 @@ namespace cv
|
||||
extern const char *pyr_up;
|
||||
void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst)
|
||||
{
|
||||
int depth = src.depth(), channels = src.channels();
|
||||
|
||||
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F);
|
||||
CV_Assert(channels == 1 || channels == 3 || channels == 4);
|
||||
|
||||
dst.create(src.rows * 2, src.cols * 2, src.type());
|
||||
|
||||
Context *clCxt = src.clCxt;
|
||||
|
@ -57,60 +57,63 @@ using namespace std;
|
||||
|
||||
PARAM_TEST_CASE(PyrBase, MatType, int)
|
||||
{
|
||||
int type;
|
||||
int depth;
|
||||
int channels;
|
||||
|
||||
Mat dst_cpu;
|
||||
oclMat gdst;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
depth = GET_PARAM(0);
|
||||
channels = GET_PARAM(1);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/////////////////////// PyrDown //////////////////////////
|
||||
struct PyrDown : PyrBase {};
|
||||
|
||||
typedef PyrBase PyrDown;
|
||||
|
||||
TEST_P(PyrDown, Mat)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
Size size(MWIDTH, MHEIGHT);
|
||||
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
|
||||
Mat src = randomMat(size, CV_MAKETYPE(depth, channels));
|
||||
oclMat gsrc(src);
|
||||
|
||||
pyrDown(src, dst_cpu);
|
||||
pyrDown(gsrc, gdst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f);
|
||||
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), depth == CV_32F ? 1e-4f : 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine(
|
||||
Values(CV_8U, CV_32F), Values(1, 3, 4)));
|
||||
Values(CV_8U, CV_16U, CV_16S, CV_32F),
|
||||
Values(1, 3, 4)));
|
||||
|
||||
/////////////////////// PyrUp //////////////////////////
|
||||
|
||||
struct PyrUp : PyrBase {};
|
||||
typedef PyrBase PyrUp;
|
||||
|
||||
TEST_P(PyrUp, Accuracy)
|
||||
{
|
||||
for(int j = 0; j < LOOP_TIMES; j++)
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
{
|
||||
Size size(MWIDTH, MHEIGHT);
|
||||
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
|
||||
Mat src = randomMat(size, CV_MAKETYPE(depth, channels));
|
||||
oclMat gsrc(src);
|
||||
|
||||
pyrUp(src, dst_cpu);
|
||||
pyrUp(gsrc, gdst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (type == CV_32F ? 1e-4f : 1.0));
|
||||
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (depth == CV_32F ? 1e-4f : 1.0));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine(
|
||||
Values(CV_8U, CV_32F), Values(1, 3, 4)));
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, Combine(
|
||||
Values(CV_8U, CV_16U, CV_16S, CV_32F),
|
||||
Values(1, 3, 4)));
|
||||
#endif // HAVE_OPENCL
|
||||
|
Loading…
x
Reference in New Issue
Block a user