Review comments

This commit is contained in:
unknown 2014-04-14 16:10:11 -07:00
parent d2b4ee1e9b
commit 36d33dd6c2
2 changed files with 18 additions and 40 deletions

View File

@ -55,6 +55,7 @@
#endif #endif
//Read pixels as integers //Read pixels as integers
// Intel Device - Read Pixels as floats
__kernel void bilateral(__global const uchar * src, int src_step, int src_offset, __kernel void bilateral(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__constant float * space_weight, __constant int * space_ofs) __constant float * space_weight, __constant int * space_ofs)
@ -69,51 +70,31 @@ __kernel void bilateral(__global const uchar * src, int src_step, int src_offset
float_t sum = (float_t)(0.0f); float_t sum = (float_t)(0.0f);
float wsum = 0.0f; float wsum = 0.0f;
#ifdef INTEL_DEVICE
float_t val0 = convert_float_t(loadpix(src + src_index));
#else
int_t val0 = convert_int_t(loadpix(src + src_index)); int_t val0 = convert_int_t(loadpix(src + src_index));
#endif
#pragma unroll #pragma unroll
for (int k = 0; k < maxk; k++ ) for (int k = 0; k < maxk; k++ )
{ {
int_t val = convert_int_t(loadpix(src + src_index + space_ofs[k])); #ifdef INTEL_DEVICE
uint diff = (uint)SUM(abs(val - val0)); float_t val = convert_float_t(loadpix(src + src_index + space_ofs[k]));
float w = space_weight[k] * native_exp((float)(diff * diff * as_float(gauss_color_coeff))); float diff = SUM(fabs(val - val0));
#else
int_t val = convert_int_t(loadpix(src + src_index + space_ofs[k]));
int diff = SUM(abs(val - val0));
#endif
float w = space_weight[k] * native_exp((float)(diff * diff * gauss_color_coeff));
sum += convert_float_t(val) * (float_t)(w); sum += convert_float_t(val) * (float_t)(w);
wsum += w; wsum += w;
} }
storepix(convert_uchar_t(sum / (float_t)(wsum)), dst + dst_index);
}
}
//Read pixels as floats
__kernel void bilateral_float(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__constant float * space_weight, __constant int * space_ofs)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (y < dst_rows && x < dst_cols)
{
int src_index = mad24(y + radius, src_step, mad24(x + radius, TSIZE, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
float_t sum = (float_t)(0.0f);
float wsum = 0.0f;
float_t val0 = convert_float_t(loadpix(src + src_index));
for (int k = 0; k < maxk; k++ )
{
float_t val = convert_float_t(loadpix(src + src_index + space_ofs[k]));
float i = SUM(fabs(val - val0));
float w = space_weight[k] * native_exp(i * i * as_float(gauss_color_coeff));
sum += val * w;
wsum += w;
}
storepix(convert_uchar_t(sum / (float_t)(wsum)), dst + dst_index); storepix(convert_uchar_t(sum / (float_t)(wsum)), dst + dst_index);
} }
} }
#ifdef INTEL_DEVICE
#if cn == 1
//for single channgel x4 sized images. //for single channgel x4 sized images.
__kernel void bilateral_float4(__global const uchar * src, int src_step, int src_offset, __kernel void bilateral_float4(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
@ -131,8 +112,7 @@ __kernel void bilateral_float4(__global const uchar * src, int src_step, int src
for (int k = 0; k < maxk; k++ ) for (int k = 0; k < maxk; k++ )
{ {
float4 val = convert_float4(vload4(0, src + src_index + space_ofs[k])); float4 val = convert_float4(vload4(0, src + src_index + space_ofs[k]));
float spacew = space_weight[k]; float4 w = space_weight[k] * native_exp((val - val0) * (val - val0) * gauss_color_coeff);
float4 w = spacew * native_exp((val - val0) * (val - val0) * as_float(gauss_color_coeff));
sum += val * w; sum += val * w;
wsum += w; wsum += w;
} }
@ -140,3 +120,5 @@ __kernel void bilateral_float4(__global const uchar * src, int src_step, int src
vstore4(convert_uchar4_rtz(sum), 0, dst + dst_index); vstore4(convert_uchar4_rtz(sum), 0, dst + dst_index);
} }
} }
#endif
#endif

View File

@ -2370,10 +2370,6 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
kernelName = "bilateral_float4"; kernelName = "bilateral_float4";
sizeDiv = 4; sizeDiv = 4;
} }
else
{
kernelName = "bilateral_float";
}
} }
ocl::Kernel k(kernelName.c_str(), ocl::imgproc::bilateral_oclsrc, ocl::Kernel k(kernelName.c_str(), ocl::imgproc::bilateral_oclsrc,
format("-D radius=%d -D maxk=%d -D cn=%d -D int_t=%s -D uint_t=uint%s -D convert_int_t=%s" format("-D radius=%d -D maxk=%d -D cn=%d -D int_t=%s -D uint_t=uint%s -D convert_int_t=%s"