Add transpose_32bit_4x4() and rename transpose_4x4() for vpx_dsp/x86
Change-Id: Ib57377f6cf6573c04720d3cc5dea4285362b4220
This commit is contained in:
parent
31cb852a90
commit
18e8baa5c0
@ -58,7 +58,7 @@ void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint16_t *dest,
|
||||
test = _mm_movemask_epi8(temp_mm);
|
||||
|
||||
if (test) {
|
||||
transpose_4x4(inptr);
|
||||
transpose_16bit_4x4(inptr);
|
||||
sign_bits[0] = _mm_cmplt_epi16(inptr[0], zero);
|
||||
sign_bits[1] = _mm_cmplt_epi16(inptr[1], zero);
|
||||
inptr[3] = _mm_unpackhi_epi16(inptr[1], sign_bits[1]);
|
||||
|
@ -77,7 +77,7 @@ void idct4_sse2(__m128i *in) {
|
||||
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
|
||||
__m128i u[8], v[8];
|
||||
|
||||
transpose_4x4(in);
|
||||
transpose_16bit_4x4(in);
|
||||
// stage 1
|
||||
u[0] = _mm_unpacklo_epi16(in[0], in[1]);
|
||||
u[1] = _mm_unpackhi_epi16(in[0], in[1]);
|
||||
@ -115,7 +115,7 @@ void iadst4_sse2(__m128i *in) {
|
||||
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
|
||||
__m128i u[8], v[8], in7;
|
||||
|
||||
transpose_4x4(in);
|
||||
transpose_16bit_4x4(in);
|
||||
in7 = _mm_srli_si128(in[1], 8);
|
||||
in7 = _mm_add_epi16(in7, in[0]);
|
||||
in7 = _mm_sub_epi16(in7, in[1]);
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include "vpx_dsp/x86/inv_txfm_sse2.h"
|
||||
#include "vpx_dsp/x86/txfm_common_sse2.h"
|
||||
|
||||
static INLINE void transpose_4x4(__m128i *res) {
|
||||
static INLINE void transpose_16bit_4x4(__m128i *res) {
|
||||
const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]);
|
||||
const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]);
|
||||
|
||||
@ -23,4 +23,33 @@ static INLINE void transpose_4x4(__m128i *res) {
|
||||
res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
|
||||
}
|
||||
|
||||
static INLINE void transpose_32bit_4x4(__m128i *const a0, __m128i *const a1,
|
||||
__m128i *const a2, __m128i *const a3) {
|
||||
// Unpack 32 bit elements. Goes from:
|
||||
// a0: 00 01 02 03
|
||||
// a1: 10 11 12 13
|
||||
// a2: 20 21 22 23
|
||||
// a3: 30 31 32 33
|
||||
// to:
|
||||
// b0: 00 10 01 11
|
||||
// b1: 20 30 21 31
|
||||
// b2: 02 12 03 13
|
||||
// b3: 22 32 23 33
|
||||
|
||||
const __m128i b0 = _mm_unpacklo_epi32(*a0, *a1);
|
||||
const __m128i b1 = _mm_unpacklo_epi32(*a2, *a3);
|
||||
const __m128i b2 = _mm_unpackhi_epi32(*a0, *a1);
|
||||
const __m128i b3 = _mm_unpackhi_epi32(*a2, *a3);
|
||||
|
||||
// Unpack 64 bit elements resulting in:
|
||||
// a0: 00 10 20 30
|
||||
// a1: 01 11 21 31
|
||||
// a2: 02 12 22 32
|
||||
// a3: 03 13 23 33
|
||||
*a0 = _mm_unpacklo_epi64(b0, b1);
|
||||
*a1 = _mm_unpackhi_epi64(b0, b1);
|
||||
*a2 = _mm_unpacklo_epi64(b2, b3);
|
||||
*a3 = _mm_unpackhi_epi64(b2, b3);
|
||||
}
|
||||
|
||||
#endif // VPX_DSP_X86_TRANSPOSE_SSE2_H_
|
||||
|
Loading…
x
Reference in New Issue
Block a user