Fix ocl::pyrUp
Use predefined OpenCL function to convert integers to floating points. This is more accurate than before as it enables: 1. saturate cast 2. customized rounding
This commit is contained in:
parent
2aa5f1bfd1
commit
df3997b108
@ -18,6 +18,7 @@
|
||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, yao@multicorewareinc.com
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
@ -47,7 +48,7 @@
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
uchar get_valid_uchar(uchar data)
|
||||
uchar get_valid_uchar(float data)
|
||||
{
|
||||
return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
|
||||
}
|
||||
@ -142,7 +143,7 @@ __kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][tidx];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = (float)(4.0f * sum);
|
||||
dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum);
|
||||
|
||||
}
|
||||
|
||||
@ -244,7 +245,7 @@ __kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
dst[x + y * dstStep] = (float)(4.0f * sum);
|
||||
dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum);
|
||||
|
||||
}
|
||||
|
||||
@ -351,31 +352,6 @@ __kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_8UC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
float4 covert_uchar4_to_float4(uchar4 data)
|
||||
{
|
||||
float4 f4Data = {0,0,0,0};
|
||||
|
||||
f4Data.x = (float)data.x;
|
||||
f4Data.y = (float)data.y;
|
||||
f4Data.z = (float)data.z;
|
||||
f4Data.w = (float)data.w;
|
||||
|
||||
return f4Data;
|
||||
}
|
||||
|
||||
|
||||
uchar4 convert_float4_to_uchar4(float4 data)
|
||||
{
|
||||
uchar4 u4Data;
|
||||
|
||||
u4Data.x = get_valid_uchar(data.x);
|
||||
u4Data.y = get_valid_uchar(data.y);
|
||||
u4Data.z = get_valid_uchar(data.z);
|
||||
u4Data.w = get_valid_uchar(data.w);
|
||||
|
||||
return u4Data;
|
||||
}
|
||||
|
||||
__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
@ -406,7 +382,7 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[tidy][tidx] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]);
|
||||
s_srcPatch[tidy][tidx] = convert_float4(src[srcx + srcy * srcStep]);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -476,38 +452,12 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
{
|
||||
dst[x + y * dstStep] = convert_float4_to_uchar4(4.0f * sum);
|
||||
dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum);
|
||||
}
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
////////////////////////// CV_16UC4 //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
float4 covert_ushort4_to_float4(ushort4 data)
|
||||
{
|
||||
float4 f4Data = {0,0,0,0};
|
||||
|
||||
f4Data.x = (float)data.x;
|
||||
f4Data.y = (float)data.y;
|
||||
f4Data.z = (float)data.z;
|
||||
f4Data.w = (float)data.w;
|
||||
|
||||
return f4Data;
|
||||
}
|
||||
|
||||
|
||||
ushort4 convert_float4_to_ushort4(float4 data)
|
||||
{
|
||||
ushort4 u4Data;
|
||||
|
||||
u4Data.x = (float)data.x;
|
||||
u4Data.y = (float)data.y;
|
||||
u4Data.z = (float)data.z;
|
||||
u4Data.w = (float)data.w;
|
||||
|
||||
return u4Data;
|
||||
}
|
||||
|
||||
|
||||
__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
|
||||
int srcRows,int dstRows,int srcCols,int dstCols,
|
||||
int srcOffset,int dstOffset,int srcStep,int dstStep)
|
||||
@ -535,7 +485,7 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
|
||||
srcy = abs(srcy);
|
||||
srcy = min(srcRows -1 ,srcy);
|
||||
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]);
|
||||
s_srcPatch[get_local_id(1)][get_local_id(0)] = convert_float4(src[srcx + srcy * srcStep]);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -570,11 +520,11 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * co1 ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
|
||||
@ -610,7 +560,7 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
|
||||
|
||||
if ((x < dstCols) && (y < dstRows))
|
||||
{
|
||||
dst[x + y * dstStep] = convert_float4_to_ushort4(4.0f * sum);
|
||||
dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum);
|
||||
}
|
||||
}
|
||||
|
||||
@ -681,11 +631,11 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + (oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[tidy][tidx] = sum;
|
||||
|
Loading…
x
Reference in New Issue
Block a user