v_extract universal intrinsic
This commit is contained in:
parent
11c3fa527a
commit
6a6ccf6032
@ -566,6 +566,7 @@ inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>&
|
|||||||
c.s[i] = a.s[i];
|
c.s[i] = a.s[i];
|
||||||
c.s[i+(n/2)] = b.s[i];
|
c.s[i+(n/2)] = b.s[i];
|
||||||
}
|
}
|
||||||
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename _Tp, int n>
|
template<typename _Tp, int n>
|
||||||
@ -577,6 +578,7 @@ inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>&
|
|||||||
c.s[i] = a.s[i+(n/2)];
|
c.s[i] = a.s[i+(n/2)];
|
||||||
c.s[i+(n/2)] = b.s[i+(n/2)];
|
c.s[i+(n/2)] = b.s[i+(n/2)];
|
||||||
}
|
}
|
||||||
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename _Tp, int n>
|
template<typename _Tp, int n>
|
||||||
@ -592,6 +594,18 @@ inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<int s, typename _Tp, int n>
|
||||||
|
inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||||
|
{
|
||||||
|
v_reg<_Tp, n> r;
|
||||||
|
int i = 0;
|
||||||
|
for (; i < s; ++i)
|
||||||
|
r.s[i] = a.s[i+n-s];
|
||||||
|
for (; i < n; ++i)
|
||||||
|
r.s[i] = b.s[i-s];
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
|
template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
|
||||||
{
|
{
|
||||||
v_reg<int, n> c;
|
v_reg<int, n> c;
|
||||||
|
@ -557,6 +557,8 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint16x8, ushort, u16)
|
|||||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16)
|
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16)
|
||||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32)
|
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32)
|
||||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32)
|
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32)
|
||||||
|
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64)
|
||||||
|
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64)
|
||||||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
|
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
|
||||||
|
|
||||||
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \
|
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \
|
||||||
@ -720,6 +722,23 @@ OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32)
|
|||||||
OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32)
|
OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32)
|
||||||
OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
|
OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
|
||||||
|
|
||||||
|
#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
|
||||||
|
template <int s> \
|
||||||
|
inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \
|
||||||
|
{ \
|
||||||
|
return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(uint8x16, u8)
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(int8x16, s8)
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(uint16x8, u16)
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(int16x8, s16)
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(uint32x4, u32)
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32)
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64)
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64)
|
||||||
|
OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
|
||||||
|
|
||||||
inline v_int32x4 v_round(const v_float32x4& a)
|
inline v_int32x4 v_round(const v_float32x4& a)
|
||||||
{
|
{
|
||||||
static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
|
static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
|
||||||
|
@ -1149,6 +1149,17 @@ OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
|
|||||||
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
|
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
|
||||||
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd)
|
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd)
|
||||||
|
|
||||||
|
template<int s, typename _Tpvec>
|
||||||
|
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||||
|
{
|
||||||
|
const int w = sizeof(typename _Tpvec::lane_type);
|
||||||
|
const int n = _Tpvec::nlanes;
|
||||||
|
__m128i ra, rb;
|
||||||
|
ra = _mm_srli_si128(a.val, s*w);
|
||||||
|
rb = _mm_slli_si128(b.val, (n-s)*w);
|
||||||
|
return _Tpvec(_mm_or_si128(ra, rb));
|
||||||
|
}
|
||||||
|
|
||||||
inline v_int32x4 v_round(const v_float32x4& a)
|
inline v_int32x4 v_round(const v_float32x4& a)
|
||||||
{ return v_int32x4(_mm_cvtps_epi32(a.val)); }
|
{ return v_int32x4(_mm_cvtps_epi32(a.val)); }
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user