cv::cvtColor (Gray2RGB5x5)

This commit is contained in:
Ilya Lavrenov 2014-10-06 13:33:06 -07:00
parent f91e461ea0
commit eb3046f7d3

View File

@ -686,18 +686,18 @@ template<> struct RGB2RGB<uchar>
for ( ; i <= n - 64; i += 64 )
{
uint8x16x4_t v_src = vld4q_u8(src + i), v_dst;
v_dst.val[0] = v_src.val[0];
v_dst.val[0] = v_src.val[2];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[2];
v_dst.val[2] = v_src.val[0];
v_dst.val[3] = v_src.val[3];
vst4q_u8(dst + i, v_dst);
}
for ( ; i <= n - 32; i += 32 )
{
uint8x8x4_t v_src = vld4_u8(src + i), v_dst;
v_dst.val[0] = v_src.val[0];
v_dst.val[0] = v_src.val[2];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[2];
v_dst.val[2] = v_src.val[0];
v_dst.val[3] = v_src.val[3];
vst4_u8(dst + i, v_dst);
}
@ -956,23 +956,57 @@ struct Gray2RGB5x5
{
typedef uchar channel_type;
Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) {}
Gray2RGB5x5(int _greenBits) : greenBits(_greenBits)
{
#if CV_NEON
v_n7 = vdup_n_u8(~7);
v_n3 = vdup_n_u8(~3);
#endif
}
void operator()(const uchar* src, uchar* dst, int n) const
{
int i = 0;
if( greenBits == 6 )
for( int i = 0; i < n; i++ )
{
#if CV_NEON
for ( ; i <= n - 8; i += 8 )
{
uint8x8_t v_src = vld1_u8(src + i);
uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src, 3));
v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n3)), 3));
v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n7)), 8));
vst1q_u16((ushort *)dst + i, v_dst);
}
#endif
for ( ; i < n; i++ )
{
int t = src[i];
((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
}
}
else
for( int i = 0; i < n; i++ )
{
#if CV_NEON
for ( ; i <= n - 8; i += 8 )
{
uint16x8_t v_src = vmovl_u8(vshr_n_u8(vld1_u8(src + i), 3));
uint16x8_t v_dst = vorrq_u16(vorrq_u16(v_src, vshlq_n_u16(v_src, 5)), vshlq_n_u16(v_src, 10));
vst1q_u16((ushort *)dst + i, v_dst);
}
#endif
for( ; i < n; i++ )
{
int t = src[i] >> 3;
((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10));
}
}
}
int greenBits;
#if CV_NEON
uint8x8_t v_n7, v_n3;
#endif
};
@ -1046,7 +1080,7 @@ struct RGB5x52Gray
uint16x8_t v_src = vld1q_u16((ushort *)src + i);
uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8),
v_t1 = vandq_u16(vshrq_n_u16(v_src, 2), v_f8),
v_t2 = vandq_u16(vshrq_n_u16(v_src, 8), v_f8);
v_t2 = vandq_u16(vshrq_n_u16(v_src, 7), v_f8);
uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y),
vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y);