Added optimized loading to YUV2RGB_422 kernel
This commit is contained in:
parent
1466621f99
commit
643c906f3d
@ -5060,7 +5060,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
|
|||||||
CV_Assert( scn == 2 && depth == CV_8U );
|
CV_Assert( scn == 2 && depth == CV_8U );
|
||||||
|
|
||||||
k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc,
|
k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc,
|
||||||
opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d", dcn, bidx, uidx, yidx));
|
opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx,
|
||||||
|
src.offset % 4 == 0 && src.step % 4 == 0 ? " -D USE_OPTIMIZED_LOAD" : ""));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case COLOR_BGR2YCrCb:
|
case COLOR_BGR2YCrCb:
|
||||||
|
@ -573,22 +573,33 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
|
|||||||
{
|
{
|
||||||
if (y < rows )
|
if (y < rows )
|
||||||
{
|
{
|
||||||
|
__constant float* coeffs = c_YUV2RGBCoeffs_420;
|
||||||
|
|
||||||
|
#ifndef USE_OPTIMIZED_LOAD
|
||||||
float U = ((float) src[uidx]) - HALF_MAX;
|
float U = ((float) src[uidx]) - HALF_MAX;
|
||||||
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX;
|
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX;
|
||||||
|
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
|
||||||
|
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
|
||||||
|
#else
|
||||||
|
int load_src = *((__global int*) src);
|
||||||
|
float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff};
|
||||||
|
float U = vec_src[uidx] - HALF_MAX;
|
||||||
|
float V = vec_src[(2 + uidx) % 4] - HALF_MAX;
|
||||||
|
float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0];
|
||||||
|
float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0];
|
||||||
|
#endif
|
||||||
|
|
||||||
__constant float* coeffs = c_YUV2RGBCoeffs_420;
|
|
||||||
float ruv = fma(coeffs[4], V, 0.5f);
|
float ruv = fma(coeffs[4], V, 0.5f);
|
||||||
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
|
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
|
||||||
float buv = fma(coeffs[1], U, 0.5f);
|
float buv = fma(coeffs[1], U, 0.5f);
|
||||||
|
|
||||||
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
|
|
||||||
dst[2 - bidx] = convert_uchar_sat(y00 + ruv);
|
dst[2 - bidx] = convert_uchar_sat(y00 + ruv);
|
||||||
dst[1] = convert_uchar_sat(y00 + guv);
|
dst[1] = convert_uchar_sat(y00 + guv);
|
||||||
dst[bidx] = convert_uchar_sat(y00 + buv);
|
dst[bidx] = convert_uchar_sat(y00 + buv);
|
||||||
#if dcn == 4
|
#if dcn == 4
|
||||||
dst[3] = 255;
|
dst[3] = 255;
|
||||||
#endif
|
#endif
|
||||||
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
|
|
||||||
dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv);
|
dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv);
|
||||||
dst[dcn + 1] = convert_uchar_sat(y01 + guv);
|
dst[dcn + 1] = convert_uchar_sat(y01 + guv);
|
||||||
dst[dcn + bidx] = convert_uchar_sat(y01 + buv);
|
dst[dcn + bidx] = convert_uchar_sat(y01 + buv);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user