Used direct float calculations
This commit is contained in:
parent
5aa9ac9a77
commit
60367907fe
@ -77,7 +77,7 @@ enum
|
||||
{
|
||||
yuv_shift = 14,
|
||||
xyz_shift = 12,
|
||||
hsv_shift = 12,
|
||||
hsv_shift = 12,
|
||||
R2Y = 4899,
|
||||
G2Y = 9617,
|
||||
B2Y = 1868,
|
||||
@ -149,7 +149,7 @@ __kernel void RGB2Gray(__global const uchar * srcptr, int src_step, int src_offs
|
||||
#ifdef DEPTH_5
|
||||
dst[0] = fma(src_pix.B_COMP, 0.114f, fma(src_pix.G_COMP, 0.587f, src_pix.R_COMP * 0.299f));
|
||||
#else
|
||||
dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, src_pix.R_COMP * R2Y)), yuv_shift);
|
||||
dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, mul24(src_pix.R_COMP, R2Y))), yuv_shift);
|
||||
#endif
|
||||
++y;
|
||||
src_index += src_step;
|
||||
@ -224,13 +224,13 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset
|
||||
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeffs = c_RGB2YUVCoeffs_f;
|
||||
const DATA_TYPE Y = fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2]));
|
||||
const DATA_TYPE Y = fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2]));
|
||||
const DATA_TYPE U = fma(b - Y, coeffs[3], HALF_MAX);
|
||||
const DATA_TYPE V = fma(r - Y, coeffs[4], HALF_MAX);
|
||||
#else
|
||||
__constant int * coeffs = c_RGB2YUVCoeffs_i;
|
||||
const int delta = HALF_MAX * (1 << yuv_shift);
|
||||
const int Y = CV_DESCALE(mad24(b, coeffs[0], mad24(g, coeffs[1], r * coeffs[2])), yuv_shift);
|
||||
const int Y = CV_DESCALE(mad24(b, coeffs[0], mad24(g, coeffs[1], mul24(r, coeffs[2]))), yuv_shift);
|
||||
const int U = CV_DESCALE(mad24(b - Y, coeffs[3], delta), yuv_shift);
|
||||
const int V = CV_DESCALE(mad24(r - Y, coeffs[4], delta), yuv_shift);
|
||||
#endif
|
||||
@ -247,8 +247,8 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset
|
||||
}
|
||||
}
|
||||
|
||||
__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
|
||||
__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
|
||||
__constant float c_YUV2RGBCoeffs_f[4] = { 2.032f, -0.395f, -0.581f, 1.140f };
|
||||
__constant int c_YUV2RGBCoeffs_i[4] = { 33292, -6472, -9519, 18678 };
|
||||
|
||||
__kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dt_offset,
|
||||
@ -279,9 +279,9 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset
|
||||
float b = fma(U - HALF_MAX, coeffs[0], Y);
|
||||
#else
|
||||
__constant int * coeffs = c_YUV2RGBCoeffs_i;
|
||||
const int r = Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift);
|
||||
const int g = Y + CV_DESCALE(mad24(V - HALF_MAX, coeffs[2], (U - HALF_MAX) * coeffs[1]), yuv_shift);
|
||||
const int b = Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift);
|
||||
const int r = Y + CV_DESCALE(mul24(V - HALF_MAX, coeffs[3]), yuv_shift);
|
||||
const int g = Y + CV_DESCALE(mad24(V - HALF_MAX, coeffs[2], mul24(U - HALF_MAX, coeffs[1])), yuv_shift);
|
||||
const int b = Y + CV_DESCALE(mul24(U - HALF_MAX, coeffs[0]), yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[bidx] = SAT_CAST( b );
|
||||
@ -297,13 +297,8 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__constant int ITUR_BT_601_CY = 1220542;
|
||||
__constant int ITUR_BT_601_CUB = 2116026;
|
||||
__constant int ITUR_BT_601_CUG = -409993;
|
||||
__constant int ITUR_BT_601_CVG = -852492;
|
||||
__constant int ITUR_BT_601_CVR = 1673527;
|
||||
__constant int ITUR_BT_601_SHIFT = 20;
|
||||
__constant float c_YUV2RGBCoeffs_420[5] = { 1.163999557f, 2.017999649f, -0.390999794f,
|
||||
-0.812999725f, 1.5959997177f };
|
||||
|
||||
__kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dt_offset,
|
||||
@ -324,46 +319,47 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of
|
||||
__global uchar* dst1 = dstptr + mad24(y << 1, dst_step, mad24(x, dcn<<1, dt_offset));
|
||||
__global uchar* dst2 = dst1 + dst_step;
|
||||
|
||||
int Y1 = ysrc[0];
|
||||
int Y2 = ysrc[1];
|
||||
int Y3 = ysrc[src_step];
|
||||
int Y4 = ysrc[src_step + 1];
|
||||
float Y1 = ysrc[0];
|
||||
float Y2 = ysrc[1];
|
||||
float Y3 = ysrc[src_step];
|
||||
float Y4 = ysrc[src_step + 1];
|
||||
|
||||
int U = ((int)usrc[uidx]) - HALF_MAX;
|
||||
int V = ((int)usrc[1-uidx]) - HALF_MAX;
|
||||
float U = ((float)usrc[uidx]) - HALF_MAX;
|
||||
float V = ((float)usrc[1-uidx]) - HALF_MAX;
|
||||
|
||||
int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1)));
|
||||
int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1))));
|
||||
int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1)));
|
||||
__constant float* coeffs = c_YUV2RGBCoeffs_420;
|
||||
float ruv = fma(coeffs[4], V, 0.5f);
|
||||
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
|
||||
float buv = fma(coeffs[1], U, 0.5f);
|
||||
|
||||
Y1 = mul24(max(0, Y1 - 16), ITUR_BT_601_CY);
|
||||
dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT);
|
||||
Y1 = max(0.f, Y1 - 16.f) * coeffs[0];
|
||||
dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv);
|
||||
dst1[1] = convert_uchar_sat(Y1 + guv);
|
||||
dst1[bidx] = convert_uchar_sat(Y1 + buv);
|
||||
#if dcn == 4
|
||||
dst1[3] = 255;
|
||||
#endif
|
||||
|
||||
Y2 = mul24(max(0, Y2 - 16), ITUR_BT_601_CY);
|
||||
dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT);
|
||||
Y2 = max(0.f, Y2 - 16.f) * coeffs[0];
|
||||
dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv);
|
||||
dst1[dcn + 1] = convert_uchar_sat(Y2 + guv);
|
||||
dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv);
|
||||
#if dcn == 4
|
||||
dst1[7] = 255;
|
||||
#endif
|
||||
|
||||
Y3 = mul24(max(0, Y3 - 16), ITUR_BT_601_CY);
|
||||
dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT);
|
||||
Y3 = max(0.f, Y3 - 16.f) * coeffs[0];
|
||||
dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv);
|
||||
dst2[1] = convert_uchar_sat(Y3 + guv);
|
||||
dst2[bidx] = convert_uchar_sat(Y3 + buv);
|
||||
#if dcn == 4
|
||||
dst2[3] = 255;
|
||||
#endif
|
||||
|
||||
Y4 = mul24(max(0, Y4 - 16), ITUR_BT_601_CY);
|
||||
dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT);
|
||||
Y4 = max(0.f, Y4 - 16.f) * coeffs[0];
|
||||
dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv);
|
||||
dst2[dcn + 1] = convert_uchar_sat(Y4 + guv);
|
||||
dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv);
|
||||
#if dcn == 4
|
||||
dst2[7] = 255;
|
||||
#endif
|
||||
@ -391,56 +387,57 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int
|
||||
__global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (dcn<<1) + dt_offset);
|
||||
__global uchar* dst2 = dst1 + dst_step;
|
||||
|
||||
int Y1 = ysrc[0];
|
||||
int Y2 = ysrc[1];
|
||||
int Y3 = ysrc[src_step];
|
||||
int Y4 = ysrc[src_step + 1];
|
||||
float Y1 = ysrc[0];
|
||||
float Y2 = ysrc[1];
|
||||
float Y3 = ysrc[src_step];
|
||||
float Y4 = ysrc[src_step + 1];
|
||||
|
||||
#ifdef SRC_CONT
|
||||
__global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset);
|
||||
int u_ind = mad24(y, cols >> 1, x);
|
||||
int uv[2] = { ((int)uvsrc[u_ind]) - HALF_MAX, ((int)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX };
|
||||
float uv[2] = { ((float)uvsrc[u_ind]) - HALF_MAX, ((float)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX };
|
||||
#else
|
||||
int vsteps[2] = { cols >> 1, src_step - (cols >> 1)};
|
||||
__global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x);
|
||||
__global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0);
|
||||
int uv[2] = { ((int)usrc[0]) - HALF_MAX, ((int)vsrc[0]) - HALF_MAX };
|
||||
float uv[2] = { ((float)usrc[0]) - HALF_MAX, ((float)vsrc[0]) - HALF_MAX };
|
||||
#endif
|
||||
int U = uv[uidx];
|
||||
int V = uv[1-uidx];
|
||||
float U = uv[uidx];
|
||||
float V = uv[1-uidx];
|
||||
|
||||
int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1)));
|
||||
int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1))));
|
||||
int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1)));
|
||||
__constant float* coeffs = c_YUV2RGBCoeffs_420;
|
||||
float ruv = fma(coeffs[4], V, 0.5f);
|
||||
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
|
||||
float buv = fma(coeffs[1], U, 0.5f);
|
||||
|
||||
Y1 = mul24(max(0, Y1 - 16), ITUR_BT_601_CY);
|
||||
dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT);
|
||||
Y1 = max(0.f, Y1 - 16.f) * coeffs[0];
|
||||
dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv);
|
||||
dst1[1] = convert_uchar_sat(Y1 + guv);
|
||||
dst1[bidx] = convert_uchar_sat(Y1 + buv);
|
||||
#if dcn == 4
|
||||
dst1[3] = 255;
|
||||
#endif
|
||||
|
||||
Y2 = mul24(max(0, Y2 - 16), ITUR_BT_601_CY);
|
||||
dst1[dcn + 2 - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT);
|
||||
Y2 = max(0.f, Y2 - 16.f) * coeffs[0];
|
||||
dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv);
|
||||
dst1[dcn + 1] = convert_uchar_sat(Y2 + guv);
|
||||
dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv);
|
||||
#if dcn == 4
|
||||
dst1[7] = 255;
|
||||
#endif
|
||||
|
||||
Y3 = mul24(max(0, Y3 - 16), ITUR_BT_601_CY);
|
||||
dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT);
|
||||
Y3 = max(0.f, Y3 - 16.f) * coeffs[0];
|
||||
dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv);
|
||||
dst2[1] = convert_uchar_sat(Y3 + guv);
|
||||
dst2[bidx] = convert_uchar_sat(Y3 + buv);
|
||||
#if dcn == 4
|
||||
dst2[3] = 255;
|
||||
#endif
|
||||
|
||||
Y4 = mul24(max(0, Y4 - 16), ITUR_BT_601_CY);
|
||||
dst2[dcn + 2 - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT);
|
||||
Y4 = max(0.f, Y4 - 16.f) * coeffs[0];
|
||||
dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv);
|
||||
dst2[dcn + 1] = convert_uchar_sat(Y4 + guv);
|
||||
dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv);
|
||||
#if dcn == 4
|
||||
dst2[7] = 255;
|
||||
#endif
|
||||
@ -450,16 +447,8 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int
|
||||
}
|
||||
}
|
||||
|
||||
__constant int ITUR_BT_601_CRY = 269484;
|
||||
__constant int ITUR_BT_601_CGY = 528482;
|
||||
__constant int ITUR_BT_601_CBY = 102760;
|
||||
__constant int ITUR_BT_601_CRU = -155188;
|
||||
__constant int ITUR_BT_601_CGU = -305135;
|
||||
__constant int ITUR_BT_601_CBU = 460324;
|
||||
__constant int ITUR_BT_601_CGV = -385875;
|
||||
__constant int ITUR_BT_601_CBV = -74448;
|
||||
__constant int YSHIFT = 17301504;
|
||||
__constant int UVSHIFT = 134742016;
|
||||
__constant float c_RGB2YUVCoeffs_420[8] = { 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, -0.2909994125f,
|
||||
0.438999176f, -0.3679990768f, -0.0709991455f };
|
||||
|
||||
__kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dst_offset,
|
||||
@ -488,26 +477,22 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int
|
||||
__global uchar* udst = dstptr + mad24(y_rows + (y>>1), dst_step, dst_offset + (y%2)*(cols >> 1) + x);
|
||||
__global uchar* vdst = udst + mad24(y_rows >> 2, dst_step, y_rows % 4 ? vsteps[y%2] : 0);
|
||||
|
||||
int4 src_pix1 = convert_int4(vload4(0, src1));
|
||||
int4 src_pix2 = convert_int4(vload4(0, src1+scn));
|
||||
int4 src_pix3 = convert_int4(vload4(0, src2));
|
||||
int4 src_pix4 = convert_int4(vload4(0, src2+scn));
|
||||
float4 src_pix1 = convert_float4(vload4(0, src1));
|
||||
float4 src_pix2 = convert_float4(vload4(0, src1+scn));
|
||||
float4 src_pix3 = convert_float4(vload4(0, src2));
|
||||
float4 src_pix4 = convert_float4(vload4(0, src2+scn));
|
||||
|
||||
int y00 = mad24(ITUR_BT_601_CRY, src_pix1.R_COMP, mad24(ITUR_BT_601_CGY, src_pix1.G_COMP, mad24(ITUR_BT_601_CBY, src_pix1.B_COMP, YSHIFT)));
|
||||
int y01 = mad24(ITUR_BT_601_CRY, src_pix2.R_COMP, mad24(ITUR_BT_601_CGY, src_pix2.G_COMP, mad24(ITUR_BT_601_CBY, src_pix2.B_COMP, YSHIFT)));
|
||||
int y10 = mad24(ITUR_BT_601_CRY, src_pix3.R_COMP, mad24(ITUR_BT_601_CGY, src_pix3.G_COMP, mad24(ITUR_BT_601_CBY, src_pix3.B_COMP, YSHIFT)));
|
||||
int y11 = mad24(ITUR_BT_601_CRY, src_pix4.R_COMP, mad24(ITUR_BT_601_CGY, src_pix4.G_COMP, mad24(ITUR_BT_601_CBY, src_pix4.B_COMP, YSHIFT)));
|
||||
__constant float* coeffs = c_RGB2YUVCoeffs_420;
|
||||
ydst1[0] = convert_uchar_sat(fma(coeffs[0], src_pix1.R_COMP, fma(coeffs[1], src_pix1.G_COMP, fma(coeffs[2], src_pix1.B_COMP, 16.5f))));
|
||||
ydst1[1] = convert_uchar_sat(fma(coeffs[0], src_pix2.R_COMP, fma(coeffs[1], src_pix2.G_COMP, fma(coeffs[2], src_pix2.B_COMP, 16.5f))));
|
||||
ydst2[0] = convert_uchar_sat(fma(coeffs[0], src_pix3.R_COMP, fma(coeffs[1], src_pix3.G_COMP, fma(coeffs[2], src_pix3.B_COMP, 16.5f))));
|
||||
ydst2[1] = convert_uchar_sat(fma(coeffs[0], src_pix4.R_COMP, fma(coeffs[1], src_pix4.G_COMP, fma(coeffs[2], src_pix4.B_COMP, 16.5f))));
|
||||
|
||||
ydst1[0] = convert_uchar_sat(y00 >> ITUR_BT_601_SHIFT);
|
||||
ydst1[1] = convert_uchar_sat(y01 >> ITUR_BT_601_SHIFT);
|
||||
ydst2[0] = convert_uchar_sat(y10 >> ITUR_BT_601_SHIFT);
|
||||
ydst2[1] = convert_uchar_sat(y11 >> ITUR_BT_601_SHIFT);
|
||||
float uv[2] = { fma(coeffs[3], src_pix1.R_COMP, fma(coeffs[4], src_pix1.G_COMP, fma(coeffs[5], src_pix1.B_COMP, 128.5f))),
|
||||
fma(coeffs[5], src_pix1.R_COMP, fma(coeffs[6], src_pix1.G_COMP, fma(coeffs[7], src_pix1.B_COMP, 128.5f))) };
|
||||
|
||||
int uv[2] = { mad24(ITUR_BT_601_CRU, src_pix1.R_COMP, mad24(ITUR_BT_601_CGU, src_pix1.G_COMP, mad24(ITUR_BT_601_CBU, src_pix1.B_COMP, UVSHIFT))),
|
||||
mad24(ITUR_BT_601_CBU, src_pix1.R_COMP, mad24(ITUR_BT_601_CGV, src_pix1.G_COMP, mad24(ITUR_BT_601_CBV, src_pix1.B_COMP, UVSHIFT))) };
|
||||
|
||||
udst[0] = convert_uchar_sat(uv[uidx] >> ITUR_BT_601_SHIFT);
|
||||
vdst[0] = convert_uchar_sat(uv[1-uidx] >> ITUR_BT_601_SHIFT);
|
||||
udst[0] = convert_uchar_sat(uv[uidx] );
|
||||
vdst[0] = convert_uchar_sat(uv[1-uidx]);
|
||||
|
||||
++y;
|
||||
src_index += 2*src_step;
|
||||
@ -534,25 +519,27 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
|
||||
{
|
||||
if (y < rows )
|
||||
{
|
||||
int U = ((int) src[uidx]) - HALF_MAX;
|
||||
int V = ((int) src[(2 + uidx) % 4]) - HALF_MAX;
|
||||
float U = ((float) src[uidx]) - HALF_MAX;
|
||||
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX;
|
||||
|
||||
int ruv = mad24(ITUR_BT_601_CVR, V, (1 << (ITUR_BT_601_SHIFT - 1)));
|
||||
int guv = mad24(ITUR_BT_601_CVG, V, mad24(ITUR_BT_601_CUG, U, (1 << (ITUR_BT_601_SHIFT - 1))));
|
||||
int buv = mad24(ITUR_BT_601_CUB, U, (1 << (ITUR_BT_601_SHIFT - 1)));
|
||||
|
||||
int y00 = max(0, ((int) src[yidx]) - 16) * ITUR_BT_601_CY;
|
||||
dst[2 - bidx] = convert_uchar_sat((y00 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst[1] = convert_uchar_sat((y00 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst[bidx] = convert_uchar_sat((y00 + buv) >> ITUR_BT_601_SHIFT);
|
||||
__constant float* coeffs = c_YUV2RGBCoeffs_420;
|
||||
float ruv = fma(coeffs[4], V, 0.5f);
|
||||
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
|
||||
float buv = fma(coeffs[1], U, 0.5f);
|
||||
|
||||
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
|
||||
dst[2 - bidx] = convert_uchar_sat(y00 + ruv);
|
||||
dst[1] = convert_uchar_sat(y00 + guv);
|
||||
dst[bidx] = convert_uchar_sat(y00 + buv);
|
||||
#if dcn == 4
|
||||
dst[3] = 255;
|
||||
#endif
|
||||
|
||||
int y01 = max(0, ((int) src[yidx + 2]) - 16) * ITUR_BT_601_CY;
|
||||
dst[dcn + 2 - bidx] = convert_uchar_sat((y01 + ruv) >> ITUR_BT_601_SHIFT);
|
||||
dst[dcn + 1] = convert_uchar_sat((y01 + guv) >> ITUR_BT_601_SHIFT);
|
||||
dst[dcn + bidx] = convert_uchar_sat((y01 + buv) >> ITUR_BT_601_SHIFT);
|
||||
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
|
||||
dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv);
|
||||
dst[dcn + 1] = convert_uchar_sat(y01 + guv);
|
||||
dst[dcn + bidx] = convert_uchar_sat(y01 + buv);
|
||||
#if dcn == 4
|
||||
dst[7] = 255;
|
||||
#endif
|
||||
@ -599,7 +586,7 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offs
|
||||
#else
|
||||
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
|
||||
int delta = HALF_MAX * (1 << yuv_shift);
|
||||
int Y = CV_DESCALE(mad24(b, coeffs[2], mad24(g, coeffs[1], r * coeffs[0])), yuv_shift);
|
||||
int Y = CV_DESCALE(mad24(b, coeffs[2], mad24(g, coeffs[1], mul24(r, coeffs[0]))), yuv_shift);
|
||||
int Cr = CV_DESCALE(mad24(r - Y, coeffs[3], delta), yuv_shift);
|
||||
int Cb = CV_DESCALE(mad24(b - Y, coeffs[4], delta), yuv_shift);
|
||||
#endif
|
||||
|
@ -387,14 +387,14 @@ struct CvtColor_RGB2YUV_420 :
|
||||
}
|
||||
};
|
||||
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_YV12) { performTest(4, 1, CVTCODE(RGBA2YUV_YV12)); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_YV12) { performTest(4, 1, CVTCODE(BGRA2YUV_YV12)); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_YV12) { performTest(3, 1, CVTCODE(RGB2YUV_YV12)); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_YV12) { performTest(3, 1, CVTCODE(BGR2YUV_YV12)); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_IYUV) { performTest(4, 1, CVTCODE(RGBA2YUV_IYUV)); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_IYUV) { performTest(4, 1, CVTCODE(BGRA2YUV_IYUV)); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_IYUV) { performTest(3, 1, CVTCODE(RGB2YUV_IYUV)); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_IYUV) { performTest(3, 1, CVTCODE(BGR2YUV_IYUV)); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_YV12) { performTest(4, 1, CVTCODE(RGBA2YUV_YV12), 1); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_YV12) { performTest(4, 1, CVTCODE(BGRA2YUV_YV12), 1); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_YV12) { performTest(3, 1, CVTCODE(RGB2YUV_YV12), 1); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_YV12) { performTest(3, 1, CVTCODE(BGR2YUV_YV12), 1); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, RGBA2YUV_IYUV) { performTest(4, 1, CVTCODE(RGBA2YUV_IYUV), 1); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, BGRA2YUV_IYUV) { performTest(4, 1, CVTCODE(BGRA2YUV_IYUV), 1); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, RGB2YUV_IYUV) { performTest(3, 1, CVTCODE(RGB2YUV_IYUV), 1); }
|
||||
OCL_TEST_P(CvtColor_RGB2YUV_420, BGR2YUV_IYUV) { performTest(3, 1, CVTCODE(BGR2YUV_IYUV), 1); }
|
||||
|
||||
// YUV422 -> RGBA
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user