This commit is contained in:
Ilya Lavrenov
2014-09-22 23:39:34 +04:00
parent b8457be394
commit 00f16e9178
3 changed files with 81 additions and 3 deletions

View File

@@ -107,6 +107,14 @@ copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mste
_mm_storeu_si128((__m128i*)(dst + x), rDst);
}
}
#elif CV_NEON
uint8x16_t v_zero = vdupq_n_u8(0);
for( ; x <= size.width - 16; x += 16 )
{
uint8x16_t v_mask = vcgtq_u8(vld1q_u8(mask + x), v_zero);
uint8x16_t v_dst = vld1q_u8(dst + x), v_src = vld1q_u8(src + x);
vst1q_u8(dst + x, vbslq_u8(v_mask, v_src, v_dst));
}
#endif
for( ; x < size.width; x++ )
if( mask[x] )
@@ -143,6 +151,17 @@ copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mst
_mm_storeu_si128((__m128i*)(dst + x), rDst);
}
}
#elif CV_NEON
uint8x8_t v_zero = vdup_n_u8(0);
for( ; x <= size.width - 8; x += 8 )
{
uint8x8_t v_mask = vcgt_u8(vld1_u8(mask + x), v_zero);
uint8x8x2_t v_mask2 = vzip_u8(v_mask, v_mask);
uint16x8_t v_mask_res = vreinterpretq_u16_u8(vcombine_u8(v_mask2.val[0], v_mask2.val[1]));
uint16x8_t v_src = vld1q_u16(src + x), v_dst = vld1q_u16(dst + x);
vst1q_u16(dst + x, vbslq_u16(v_mask_res, v_src, v_dst));
}
#endif
for( ; x < size.width; x++ )
if( mask[x] )