Adding SSE2 optimized vp9_short_idct32x32_1_add function.

Change-Id: I4b1c6bb9ff615f5872b96ed07dbf0f5e18e63643
This commit is contained in:
Dmitry Kovalev 2013-10-01 18:34:36 -07:00
parent e83ebc8992
commit 3c4e9e341f
2 changed files with 49 additions and 1 deletions

View File

@ -311,7 +311,7 @@ prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_
specialize vp9_short_idct32x32_add sse2 neon
prototype void vp9_short_idct32x32_1_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct32x32_1_add
specialize vp9_short_idct32x32_1_add sse2
prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
specialize vp9_short_iht4x4_add sse2 neon

View File

@ -3549,3 +3549,51 @@ void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) {
}
}
} //NOLINT
void vp9_short_idct32x32_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a, i;
a = dct_const_round_shift(input[0] * cospi_16_64);
a = dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
dc_value = _mm_set1_epi16(a);
for (i = 0; i < 4; ++i) {
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
RECON_AND_STORE(dest, dc_value);
dest += 8 - (stride * 32);
}
}