diff --git a/modules/hal/include/opencv2/hal/intrin_cpp.hpp b/modules/hal/include/opencv2/hal/intrin_cpp.hpp index d0d5b28a2..e0140a863 100644 --- a/modules/hal/include/opencv2/hal/intrin_cpp.hpp +++ b/modules/hal/include/opencv2/hal/intrin_cpp.hpp @@ -735,11 +735,11 @@ OPENCV_HAL_IMPL_C_INIT(v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_INIT(v_uint64x2, int64, s64) #define OPENCV_HAL_IMPL_C_SHIFT(_Tpvec, _Tp) \ -template inline _Tpvec v_lshift(const _Tpvec& a) \ +template inline _Tpvec v_shl(const _Tpvec& a) \ { return a << n; } \ -template inline _Tpvec v_rshift(const _Tpvec& a) \ +template inline _Tpvec v_shr(const _Tpvec& a) \ { return a >> n; } \ -template inline _Tpvec v_rshift_round(const _Tpvec& a) \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ { \ _Tpvec c; \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ @@ -766,7 +766,7 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ } \ return c; \ } \ -template inline _Tpnvec v_rshift_round_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ { \ _Tpnvec c; \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ @@ -781,7 +781,7 @@ inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ ptr[i] = saturate_cast<_Tpn>(a.s[i]); \ } \ -template inline void v_rshift_round_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ { \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ ptr[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ diff --git a/modules/hal/include/opencv2/hal/intrin_neon.hpp b/modules/hal/include/opencv2/hal/intrin_neon.hpp index 4bda95db6..ab6aa8631 100644 --- a/modules/hal/include/opencv2/hal/intrin_neon.hpp +++ b/modules/hal/include/opencv2/hal/intrin_neon.hpp @@ -264,14 +264,14 @@ inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ vst1_##suffix(ptr, a1); \ } \ template inline \ -_Tpvec v_rshift_round_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ { \ hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \ hreg b1 = vqrshr##op##_n_##wsuffix(b.val, n); \ return _Tpvec(vcombine_##suffix(a1, b1)); \ } \ template inline \ -void v_rshift_round_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ { \ hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \ vst1_##suffix(ptr, a1); \ @@ -283,7 +283,7 @@ OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, s16, pack, OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, u32, pack, n) OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un) OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, s32, pack, n) -OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u32, pack, n) +OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u64, pack, n) OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, s64, pack, n) inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, @@ -516,11 +516,11 @@ inline _Tpvec operator << (const _Tpvec& a, int n) \ { return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \ inline _Tpvec operator >> (const _Tpvec& a, int n) \ { return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } \ -template inline _Tpvec lshift(const _Tpvec& a) \ +template inline _Tpvec v_shl(const _Tpvec& a) \ { return _Tpvec(vshlq_n_##suffix(a.val, n)); } \ -template inline _Tpvec rshift(const _Tpvec& a) \ +template inline _Tpvec v_shr(const _Tpvec& a) \ { return _Tpvec(vshrq_n_##suffix(a.val, n)); } \ -template inline _Tpvec rshift_round(const _Tpvec& a) \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ { return _Tpvec(vrshrq_n_##suffix(a.val, n)); } OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8) diff --git a/modules/hal/include/opencv2/hal/intrin_sse.hpp b/modules/hal/include/opencv2/hal/intrin_sse.hpp index 12f5789ea..3b77a1154 100644 --- a/modules/hal/include/opencv2/hal/intrin_sse.hpp +++ b/modules/hal/include/opencv2/hal/intrin_sse.hpp @@ -318,7 +318,7 @@ inline void v_pack_u_store(uchar* ptr, const v_int16x8& a) { _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a.val, a.val)); } template inline -v_uint8x16 v_rshift_round_pack(const v_uint16x8& a, const v_uint16x8& b) +v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b) { // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); @@ -327,7 +327,7 @@ v_uint8x16 v_rshift_round_pack(const v_uint16x8& a, const v_uint16x8& b) } template inline -void v_rshift_round_pack_store(uchar* ptr, const v_uint16x8& a) +void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a) { __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); __m128i a1 = _mm_srli_epi16(_mm_adds_epu16(a.val, delta), n); @@ -335,7 +335,7 @@ void v_rshift_round_pack_store(uchar* ptr, const v_uint16x8& a) } template inline -v_uint8x16 v_rshift_round_pack_u(const v_int16x8& a, const v_int16x8& b) +v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) { __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); return v_uint8x16(_mm_packus_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n), @@ -343,7 +343,7 @@ v_uint8x16 v_rshift_round_pack_u(const v_int16x8& a, const v_int16x8& b) } template inline -void v_rshift_round_pack_u_store(uchar* ptr, const v_int16x8& a) +void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a) { __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n); @@ -357,7 +357,7 @@ inline void v_pack_store(schar* ptr, v_int16x8& a) { _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a.val, a.val)); } template inline -v_int8x16 v_rshift_round_pack(const v_int16x8& a, const v_int16x8& b) +v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) { // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); @@ -365,7 +365,7 @@ v_int8x16 v_rshift_round_pack(const v_int16x8& a, const v_int16x8& b) _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n))); } template inline -void v_rshift_round_pack_store(schar* ptr, const v_int16x8& a) +void v_rshr_pack_store(schar* ptr, const v_int16x8& a) { // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); @@ -398,7 +398,7 @@ inline void v_pack_store(ushort* ptr, const v_uint32x4& a) } template inline -v_uint16x8 v_rshift_round_pack(const v_uint32x4& a, const v_uint32x4& b) +v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) { __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32); @@ -407,7 +407,7 @@ v_uint16x8 v_rshift_round_pack(const v_uint32x4& a, const v_uint32x4& b) } template inline -void v_rshift_round_pack_store(ushort* ptr, const v_uint32x4& a) +void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) { __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32); @@ -431,7 +431,7 @@ inline void v_pack_u_store(ushort* ptr, const v_int32x4& a) } template inline -void v_rshift_round_pack_u_store(ushort* ptr, const v_int32x4& a) +void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a) { __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32); @@ -448,7 +448,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a) } template inline -v_int16x8 v_rshift_round_pack(const v_int32x4& a, const v_int32x4& b) +v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) { __m128i delta = _mm_set1_epi32(1 << (n-1)); return v_int16x8(_mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), @@ -456,7 +456,7 @@ v_int16x8 v_rshift_round_pack(const v_int32x4& a, const v_int32x4& b) } template inline -void v_rshift_round_pack_store(short* ptr, const v_int32x4& a) +void v_rshr_pack_store(short* ptr, const v_int32x4& a) { __m128i delta = _mm_set1_epi32(1 << (n-1)); __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n); @@ -493,19 +493,19 @@ inline void v_pack_store(int* ptr, const v_int64x2& a) } template inline -v_uint32x4 v_rshift_round_pack(const v_uint64x2& a, const v_uint64x2& b) +v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) { uint64 delta = (uint64)1 << (n-1); v_uint64x2 delta2(delta, delta); - __m128 a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n); - __m128 b1 = _mm_srli_epi64(_mm_add_epi64(b.val, delta2.val), n); + __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i b1 = _mm_srli_epi64(_mm_add_epi64(b.val, delta2.val), n); __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0 __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0 return v_uint32x4(_mm_unpacklo_epi64(v0, v1)); } template inline -void v_rshift_round_pack_store(unsigned* ptr, const v_uint64x2& a) +void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a) { uint64 delta = (uint64)1 << (n-1); v_uint64x2 delta2(delta, delta); @@ -526,7 +526,7 @@ inline __m128i v_srai_epi64(__m128i a, int imm) } template inline -v_int32x4 v_rshift_round_pack(const v_int64x2& a, const v_int64x2& b) +v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) { int64 delta = (int64)1 << (n-1); v_int64x2 delta2(delta, delta); @@ -538,7 +538,7 @@ v_int32x4 v_rshift_round_pack(const v_int64x2& a, const v_int64x2& b) } template inline -void v_rshift_round_pack_store(int* ptr, const v_int64x2& a) +void v_rshr_pack_store(int* ptr, const v_int64x2& a) { int64 delta = (int64)1 << (n-1); v_int64x2 delta2(delta, delta); @@ -901,22 +901,22 @@ inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ return _Tpsvec(srai(a.val, imm)); \ } \ template \ -inline _Tpuvec v_lshift(const _Tpuvec& a) \ +inline _Tpuvec v_shl(const _Tpuvec& a) \ { \ return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \ } \ template \ -inline _Tpsvec v_lshift(const _Tpsvec& a) \ +inline _Tpsvec v_shl(const _Tpsvec& a) \ { \ return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \ } \ template \ -inline _Tpuvec v_rshift(const _Tpuvec& a) \ +inline _Tpuvec v_shr(const _Tpuvec& a) \ { \ return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \ } \ template \ -inline _Tpsvec v_rshift(const _Tpsvec& a) \ +inline _Tpsvec v_shr(const _Tpsvec& a) \ { \ return _Tpsvec(srai(a.val, imm)); \ }