v_extract universal intrinsic
This commit is contained in:
parent
11c3fa527a
commit
6a6ccf6032
@ -566,6 +566,7 @@ inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>&
|
||||
c.s[i] = a.s[i];
|
||||
c.s[i+(n/2)] = b.s[i];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
template<typename _Tp, int n>
|
||||
@ -577,6 +578,7 @@ inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>&
|
||||
c.s[i] = a.s[i+(n/2)];
|
||||
c.s[i+(n/2)] = b.s[i+(n/2)];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
template<typename _Tp, int n>
|
||||
@ -592,6 +594,18 @@ inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
|
||||
}
|
||||
}
|
||||
|
||||
template<int s, typename _Tp, int n>
|
||||
inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||
{
|
||||
v_reg<_Tp, n> r;
|
||||
int i = 0;
|
||||
for (; i < s; ++i)
|
||||
r.s[i] = a.s[i+n-s];
|
||||
for (; i < n; ++i)
|
||||
r.s[i] = b.s[i-s];
|
||||
return r;
|
||||
}
|
||||
|
||||
template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
|
||||
{
|
||||
v_reg<int, n> c;
|
||||
|
@ -557,6 +557,8 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint16x8, ushort, u16)
|
||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16)
|
||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32)
|
||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32)
|
||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64)
|
||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64)
|
||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \
|
||||
@ -720,6 +722,23 @@ OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32)
|
||||
OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32)
|
||||
OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
|
||||
template <int s> \
|
||||
inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \
|
||||
{ \
|
||||
return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(uint8x16, u8)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(int8x16, s8)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(uint16x8, u16)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(int16x8, s16)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(uint32x4, u32)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
|
||||
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
|
||||
|
@ -1149,6 +1149,17 @@ OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
|
||||
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd)
|
||||
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{
|
||||
const int w = sizeof(typename _Tpvec::lane_type);
|
||||
const int n = _Tpvec::nlanes;
|
||||
__m128i ra, rb;
|
||||
ra = _mm_srli_si128(a.val, s*w);
|
||||
rb = _mm_slli_si128(b.val, (n-s)*w);
|
||||
return _Tpvec(_mm_or_si128(ra, rb));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{ return v_int32x4(_mm_cvtps_epi32(a.val)); }
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user