added CV_16UC(1, 3, 4), CV_16SC(1, 3, 4) data types support in ocl::pyrUp and ocl::pyrDown

This commit is contained in:
Ilya Lavrenov 2013-09-10 11:53:34 +04:00
parent 747f7178ca
commit 6b9c451938
5 changed files with 791 additions and 138 deletions

View File

@ -43,37 +43,6 @@
// //
//M*/ //M*/
//#pragma OPENCL EXTENSION cl_amd_printf : enable
uchar round_uchar_int(int v)
{
return (uchar)((uint)v <= 255 ? v : v > 0 ? 255 : 0);
}
uchar round_uchar_float(float v)
{
return round_uchar_int(convert_int_sat_rte(v));
}
uchar4 round_uchar4_int4(int4 v)
{
uchar4 result;
result.x = (uchar)(v.x <= 255 ? v.x : v.x > 0 ? 255 : 0);
result.y = (uchar)(v.y <= 255 ? v.y : v.y > 0 ? 255 : 0);
result.z = (uchar)(v.z <= 255 ? v.z : v.z > 0 ? 255 : 0);
result.w = (uchar)(v.w <= 255 ? v.w : v.w > 0 ? 255 : 0);
return result;
}
uchar4 round_uchar4_float4(float4 v)
{
return round_uchar4_int4(convert_int4_sat_rte(v));
}
int idx_row_low(int y, int last_row) int idx_row_low(int y, int last_row)
{ {
return abs(y) % (last_row + 1); return abs(y) % (last_row + 1);
@ -104,6 +73,10 @@ int idx_col(int x, int last_col)
return idx_col_low(idx_col_high(x, last_col), last_col); return idx_col_low(idx_col_high(x, last_col), last_col);
} }
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_8UC1 ///////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstCols) __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstCols)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);
@ -211,10 +184,14 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcRows,
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols) if (dst_x < dstCols)
dst[y * dstStep + dst_x] = round_uchar_float(sum); dst[y * dstStep + dst_x] = convert_uchar_sat_rte(sum);
} }
} }
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_8UC4 ///////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstCols) __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstCols)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);
@ -228,9 +205,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
const int last_row = srcRows - 1; const int last_row = srcRows - 1;
const int last_col = srcCols - 1; const int last_col = srcCols - 1;
float4 co1 = 0.375f;//(float4)(0.375f, 0.375f, 0.375f, 0.375f); float4 co1 = 0.375f;
float4 co2 = 0.25f;//(float4)(0.25f, 0.25f, 0.25f, 0.25f); float4 co2 = 0.25f;
float4 co3 = 0.0625f;//(float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); float4 co3 = 0.0625f;
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
{ {
@ -326,10 +303,482 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols) if (dst_x < dstCols)
dst[y * dstStep / 4 + dst_x] = round_uchar4_float4(sum); dst[y * dstStep / 4 + dst_x] = convert_uchar4_sat_rte(sum);
} }
} }
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC1 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrDown_C1_D2(__global ushort * srcData, int srcStep, int srcRows, int srcCols, __global ushort *dst, int dstStep, int dstCols)
{
const int x = get_global_id(0);
const int y = get_group_id(1);
__local float smem[256 + 4];
float sum;
const int src_y = 2*y;
const int last_row = srcRows - 1;
const int last_col = srcCols - 1;
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
{
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[x];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[x];
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[x];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[x];
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[x];
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x];
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[left_x];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x];
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x];
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x];
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[right_x];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x];
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x];
smem[4 + get_local_id(0)] = sum;
}
}
else
{
int col = idx_col(x, last_col);
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
col = idx_col(left_x, last_col);
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
col = idx_col(right_x, last_col);
sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
smem[4 + get_local_id(0)] = sum;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = 0.0625f * smem[2 + tid2 - 2];
sum = sum + 0.25f * smem[2 + tid2 - 1];
sum = sum + 0.375f * smem[2 + tid2 ];
sum = sum + 0.25f * smem[2 + tid2 + 1];
sum = sum + 0.0625f * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols)
dst[y * dstStep / 2 + dst_x] = convert_ushort_sat_rte(sum);
}
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrDown_C4_D2(__global ushort4 * srcData, int srcStep, int srcRows, int srcCols, __global ushort4 *dst, int dstStep, int dstCols)
{
const int x = get_global_id(0);
const int y = get_group_id(1);
__local float4 smem[256 + 4];
float4 sum;
const int src_y = 2*y;
const int last_row = srcRows - 1;
const int last_col = srcCols - 1;
float4 co1 = 0.375f;
float4 co2 = 0.25f;
float4 co3 = 0.0625f;
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
{
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]);
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]);
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]);
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]);
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]);
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]);
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]);
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]);
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]);
smem[4 + get_local_id(0)] = sum;
}
}
else
{
int col = idx_col(x, last_col);
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
col = idx_col(left_x, last_col);
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
col = idx_col(right_x, last_col);
sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
smem[4 + get_local_id(0)] = sum;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = co3 * smem[2 + tid2 - 2];
sum = sum + co2 * smem[2 + tid2 - 1];
sum = sum + co1 * smem[2 + tid2 ];
sum = sum + co2 * smem[2 + tid2 + 1];
sum = sum + co3 * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols)
dst[y * dstStep / 8 + dst_x] = convert_ushort4_sat_rte(sum);
}
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16SC1 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrDown_C1_D3(__global short * srcData, int srcStep, int srcRows, int srcCols, __global short *dst, int dstStep, int dstCols)
{
const int x = get_global_id(0);
const int y = get_group_id(1);
__local float smem[256 + 4];
float sum;
const int src_y = 2*y;
const int last_row = srcRows - 1;
const int last_col = srcCols - 1;
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
{
sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[x];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[x];
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[x];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[x];
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[x];
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x];
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[left_x];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x];
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x];
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x];
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[right_x];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x];
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x];
smem[4 + get_local_id(0)] = sum;
}
}
else
{
int col = idx_col(x, last_col);
sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
col = idx_col(left_x, last_col);
sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
col = idx_col(right_x, last_col);
sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col];
sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col];
sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col];
sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col];
smem[4 + get_local_id(0)] = sum;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = 0.0625f * smem[2 + tid2 - 2];
sum = sum + 0.25f * smem[2 + tid2 - 1];
sum = sum + 0.375f * smem[2 + tid2 ];
sum = sum + 0.25f * smem[2 + tid2 + 1];
sum = sum + 0.0625f * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols)
dst[y * dstStep / 2 + dst_x] = convert_short_sat_rte(sum);
}
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16SC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrDown_C4_D3(__global short4 * srcData, int srcStep, int srcRows, int srcCols, __global short4 *dst, int dstStep, int dstCols)
{
const int x = get_global_id(0);
const int y = get_group_id(1);
__local float4 smem[256 + 4];
float4 sum;
const int src_y = 2*y;
const int last_row = srcRows - 1;
const int last_col = srcCols - 1;
float4 co1 = 0.375f;
float4 co2 = 0.25f;
float4 co3 = 0.0625f;
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
{
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]);
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]);
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]);
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]);
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]);
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]);
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]);
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]);
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]);
smem[4 + get_local_id(0)] = sum;
}
}
else
{
int col = idx_col(x, last_col);
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
col = idx_col(left_x, last_col);
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
col = idx_col(right_x, last_col);
sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]);
sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]);
sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]);
sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]);
smem[4 + get_local_id(0)] = sum;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = co3 * smem[2 + tid2 - 2];
sum = sum + co2 * smem[2 + tid2 - 1];
sum = sum + co1 * smem[2 + tid2 ];
sum = sum + co2 * smem[2 + tid2 + 1];
sum = sum + co3 * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dstCols)
dst[y * dstStep / 8 + dst_x] = convert_short4_sat_rte(sum);
}
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_32FC1 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcRows, int srcCols, __global float *dst, int dstStep, int dstCols) __kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcRows, int srcCols, __global float *dst, int dstStep, int dstCols)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);
@ -441,6 +890,10 @@ __kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcRows,
} }
} }
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_32FC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstCols) __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstCols)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);
@ -454,9 +907,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
const int last_row = srcRows - 1; const int last_row = srcRows - 1;
const int last_col = srcCols - 1; const int last_col = srcCols - 1;
float4 co1 = 0.375f;//(float4)(0.375f, 0.375f, 0.375f, 0.375f); float4 co1 = 0.375f;
float4 co2 = 0.25f;//(float4)(0.25f, 0.25f, 0.25f, 0.25f); float4 co2 = 0.25f;
float4 co3 = 0.0625f;//(float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); float4 co3 = 0.0625f;
if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2)
{ {

View File

@ -46,15 +46,15 @@
// //
//M*/ //M*/
//#pragma OPENCL EXTENSION cl_amd_printf : enable
uchar get_valid_uchar(float data) uchar get_valid_uchar(float data)
{ {
return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0); return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
} }
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
////////////////////////// CV_8UC1 ////////////////////////////////// ////////////////////////// CV_8UC1 //////////////////////////////////
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C1_D0(__global uchar* src, __global uchar* dst, __kernel void pyrUp_C1_D0(__global uchar* src, __global uchar* dst,
int srcRows, int dstRows, int srcCols, int dstCols, int srcRows, int dstRows, int srcCols, int dstCols,
int srcOffset, int dstOffset, int srcStep, int dstStep) int srcOffset, int dstOffset, int srcStep, int dstStep)
@ -144,12 +144,12 @@ __kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
if ((x < dstCols) && (y < dstRows)) if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum); dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum);
} }
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC1 ///////////////////////////////// ////////////////////////// CV_16UC1 /////////////////////////////////
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C1_D2(__global ushort* src, __global ushort* dst, __kernel void pyrUp_C1_D2(__global ushort* src, __global ushort* dst,
int srcRows, int dstRows, int srcCols, int dstCols, int srcRows, int dstRows, int srcCols, int dstCols,
int srcOffset, int dstOffset, int srcStep, int dstStep) int srcOffset, int dstOffset, int srcStep, int dstStep)
@ -245,13 +245,113 @@ __kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows)) if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum); dst[x + y * dstStep] = convert_ushort_sat_rte(4.0f * sum);
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16SC1 /////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C1_D3(__global short* src, __global short* dst,
int srcRows, int dstRows, int srcCols, int dstCols,
int srcOffset, int dstOffset, int srcStep, int dstStep)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
__local float s_srcPatch[10][10];
__local float s_dstPatch[20][16];
srcStep = srcStep >> 1;
dstStep = dstStep >> 1;
srcOffset = srcOffset >> 1;
dstOffset = dstOffset >> 1;
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
{
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
srcx = abs(srcx);
srcx = min(srcCols - 1,srcx);
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float sum = 0;
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
if (get_local_id(1) < 2)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
}
if (get_local_id(1) > 13)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = 0;
const int tidy = get_local_id(1);
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)];
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)];
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)];
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)];
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum);
} }
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
////////////////////////// CV_32FC1 ///////////////////////////////// ////////////////////////// CV_32FC1 /////////////////////////////////
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C1_D5(__global float* src, __global float* dst, __kernel void pyrUp_C1_D5(__global float* src, __global float* dst,
int srcRows, int dstRows, int srcCols, int dstCols, int srcRows, int dstRows, int srcCols, int dstCols,
int srcOffset, int dstOffset, int srcStep, int dstStep) int srcOffset, int dstOffset, int srcStep, int dstStep)
@ -346,12 +446,12 @@ __kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
if ((x < dstCols) && (y < dstRows)) if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = (float)(4.0f * sum); dst[x + y * dstStep] = (float)(4.0f * sum);
} }
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
////////////////////////// CV_8UC4 ////////////////////////////////// ////////////////////////// CV_8UC4 //////////////////////////////////
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C4_D0(__global uchar4* src, __global uchar4* dst, __kernel void pyrUp_C4_D0(__global uchar4* src, __global uchar4* dst,
int srcRows, int dstRows, int srcCols, int dstCols, int srcRows, int dstRows, int srcCols, int dstCols,
int srcOffset, int dstOffset, int srcStep, int dstStep) int srcOffset, int dstOffset, int srcStep, int dstStep)
@ -451,14 +551,13 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx]; sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
if ((x < dstCols) && (y < dstRows)) if ((x < dstCols) && (y < dstRows))
{
dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum); dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum);
} }
}
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC4 ////////////////////////////////// ////////////////////////// CV_16UC4 //////////////////////////////////
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C4_D2(__global ushort4* src, __global ushort4* dst, __kernel void pyrUp_C4_D2(__global ushort4* src, __global ushort4* dst,
int srcRows, int dstRows, int srcCols, int dstCols, int srcRows, int dstRows, int srcCols, int dstCols,
int srcOffset, int dstOffset, int srcStep, int dstStep) int srcOffset, int dstOffset, int srcStep, int dstStep)
@ -560,14 +659,120 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
sum = sum + co3 * s_dstPatch[2 + tidy + 2][get_local_id(0)]; sum = sum + co3 * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows)) if ((x < dstCols) && (y < dstRows))
{
dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum); dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum);
} }
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16SC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C4_D3(__global short4* src, __global short4* dst,
int srcRows, int dstRows, int srcCols, int dstCols,
int srcOffset, int dstOffset, int srcStep, int dstStep)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
__local float4 s_srcPatch[10][10];
__local float4 s_dstPatch[20][16];
srcOffset >>= 3;
dstOffset >>= 3;
srcStep >>= 3;
dstStep >>= 3;
if( get_local_id(0) < 10 && get_local_id(1) < 10 )
{
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1;
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1;
srcx = abs(srcx);
srcx = min(srcCols - 1,srcx);
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
s_srcPatch[get_local_id(1)][get_local_id(0)] = convert_float4(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float4 sum = (float4)(0,0,0,0);
const float4 evenFlag = (float4)((get_local_id(0) & 1) == 0);
const float4 oddFlag = (float4)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
if(eveny)
{
sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + ( evenFlag* co1 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum;
if (get_local_id(1) < 2)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1 ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
}
if (get_local_id(1) > 13)
{
sum = 0;
if (eveny)
{
sum = sum + (evenFlag * co3) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = 0;
const int tidy = get_local_id(1);
sum = sum + co3 * s_dstPatch[2 + tidy - 2][get_local_id(0)];
sum = sum + co2 * s_dstPatch[2 + tidy - 1][get_local_id(0)];
sum = sum + co1 * s_dstPatch[2 + tidy ][get_local_id(0)];
sum = sum + co2 * s_dstPatch[2 + tidy + 1][get_local_id(0)];
sum = sum + co3 * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows))
dst[x + y * dstStep] = convert_short4_sat_rte(4.0f * sum);
} }
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
////////////////////////// CV_32FC4 ////////////////////////////////// ////////////////////////// CV_32FC4 //////////////////////////////////
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
__kernel void pyrUp_C4_D5(__global float4* src, __global float4* dst, __kernel void pyrUp_C4_D5(__global float4* src, __global float4* dst,
int srcRows, int dstRows, int srcCols, int dstCols, int srcRows, int dstRows, int srcCols, int dstCols,
int srcOffset, int dstOffset, int srcStep, int dstStep) int srcOffset, int dstOffset, int srcStep, int dstStep)
@ -667,7 +872,5 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx]; sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
if ((x < dstCols) && (y < dstRows)) if ((x < dstCols) && (y < dstRows))
{
dst[x + y * dstStep] = 4.0f * sum; dst[x + y * dstStep] = 4.0f * sum;
} }
}

View File

@ -73,24 +73,11 @@ static void pyrdown_run(const oclMat &src, const oclMat &dst)
CV_Assert(src.depth() != CV_8S); CV_Assert(src.depth() != CV_8S);
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
//int channels = dst.channels();
//int depth = dst.depth();
string kernelName = "pyrDown"; string kernelName = "pyrDown";
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1}
//};
//size_t vector_length = vector_lengths[channels-1][depth];
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
size_t localThreads[3] = { 256, 1, 1 }; size_t localThreads[3] = { 256, 1, 1 };
size_t globalThreads[3] = { src.cols, dst.rows, 1}; size_t globalThreads[3] = { src.cols, dst.rows, 1};
//int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
@ -107,7 +94,9 @@ static void pyrdown_run(const oclMat &src, const oclMat &dst)
void cv::ocl::pyrDown(const oclMat &src, oclMat &dst) void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
{ {
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); int depth = src.depth(), channels = src.channels();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F);
CV_Assert(channels == 1 || channels == 3 || channels == 4);
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());

View File

@ -61,6 +61,11 @@ namespace cv
extern const char *pyr_up; extern const char *pyr_up;
void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst) void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst)
{ {
int depth = src.depth(), channels = src.channels();
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F);
CV_Assert(channels == 1 || channels == 3 || channels == 4);
dst.create(src.rows * 2, src.cols * 2, src.type()); dst.create(src.rows * 2, src.cols * 2, src.type());
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;

View File

@ -57,60 +57,63 @@ using namespace std;
PARAM_TEST_CASE(PyrBase, MatType, int) PARAM_TEST_CASE(PyrBase, MatType, int)
{ {
int type; int depth;
int channels; int channels;
Mat dst_cpu; Mat dst_cpu;
oclMat gdst; oclMat gdst;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); depth = GET_PARAM(0);
channels = GET_PARAM(1); channels = GET_PARAM(1);
} }
}; };
/////////////////////// PyrDown ////////////////////////// /////////////////////// PyrDown //////////////////////////
struct PyrDown : PyrBase {};
typedef PyrBase PyrDown;
TEST_P(PyrDown, Mat) TEST_P(PyrDown, Mat)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
Size size(MWIDTH, MHEIGHT); Size size(MWIDTH, MHEIGHT);
Mat src = randomMat(size, CV_MAKETYPE(type, channels)); Mat src = randomMat(size, CV_MAKETYPE(depth, channels));
oclMat gsrc(src); oclMat gsrc(src);
pyrDown(src, dst_cpu); pyrDown(src, dst_cpu);
pyrDown(gsrc, gdst); pyrDown(gsrc, gdst);
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f); EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), depth == CV_32F ? 1e-4f : 1.0f);
} }
} }
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine(
Values(CV_8U, CV_32F), Values(1, 3, 4))); Values(CV_8U, CV_16U, CV_16S, CV_32F),
Values(1, 3, 4)));
/////////////////////// PyrUp ////////////////////////// /////////////////////// PyrUp //////////////////////////
struct PyrUp : PyrBase {}; typedef PyrBase PyrUp;
TEST_P(PyrUp, Accuracy) TEST_P(PyrUp, Accuracy)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
Size size(MWIDTH, MHEIGHT); Size size(MWIDTH, MHEIGHT);
Mat src = randomMat(size, CV_MAKETYPE(type, channels)); Mat src = randomMat(size, CV_MAKETYPE(depth, channels));
oclMat gsrc(src); oclMat gsrc(src);
pyrUp(src, dst_cpu); pyrUp(src, dst_cpu);
pyrUp(gsrc, gdst); pyrUp(gsrc, gdst);
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (type == CV_32F ? 1e-4f : 1.0)); EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (depth == CV_32F ? 1e-4f : 1.0));
} }
} }
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, Combine(
Values(CV_8U, CV_32F), Values(1, 3, 4))); Values(CV_8U, CV_16U, CV_16S, CV_32F),
Values(1, 3, 4)));
#endif // HAVE_OPENCL #endif // HAVE_OPENCL