16x16 inverse 2D-DCT with DC only
This commit provides special handle on 16x16 inverse 2D-DCT, where only DC coefficient is quantized to be non-zero value. Change-Id: I7bf71be7fa13384fab453dc8742b5b50e77a277c
This commit is contained in:
parent
23391ea835
commit
a7c4de22e1
@ -864,10 +864,18 @@ void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) {
|
||||
void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
int i, j;
|
||||
int a1;
|
||||
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
||||
out = dct_const_round_shift(out * cospi_16_64);
|
||||
output[0] = ROUND_POWER_OF_TWO(out, 6);
|
||||
a1 = ROUND_POWER_OF_TWO(out, 6);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
for (i = 0; i < 16; ++i)
|
||||
dest[i] = clip_pixel(dest[i] + a1);
|
||||
dest += dest_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void idct32_1d(int16_t *input, int16_t *output) {
|
||||
|
@ -306,15 +306,15 @@ specialize vp9_short_idct8x8_add sse2 neon
|
||||
prototype void vp9_short_idct10_8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct10_8x8_add sse2
|
||||
|
||||
prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct16x16_1_add sse2
|
||||
|
||||
prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct16x16_add sse2
|
||||
|
||||
prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct10_16x16_add sse2
|
||||
|
||||
prototype void vp9_short_idct1_16x16 "int16_t *input, int16_t *output"
|
||||
specialize vp9_short_idct1_16x16
|
||||
|
||||
prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct32x32_add sse2
|
||||
|
||||
|
@ -1470,6 +1470,38 @@ void vp9_short_idct16x16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
__m128i dc_value;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int a, i;
|
||||
|
||||
a = dct_const_round_shift(input[0] * cospi_16_64);
|
||||
a = dct_const_round_shift(a * cospi_16_64);
|
||||
a = ROUND_POWER_OF_TWO(a, 6);
|
||||
|
||||
dc_value = _mm_set1_epi16(a);
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
dest += 8 - (stride * 16);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
|
||||
__m128i tbuf[8];
|
||||
array_transpose_8x8(res0, res0);
|
||||
|
@ -123,14 +123,8 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
if (eob) {
|
||||
if (eob == 1) {
|
||||
/* DC only DCT coefficient. */
|
||||
int16_t in = input[0];
|
||||
int16_t out;
|
||||
/* Note: the idct1 will need to be modified accordingly whenever
|
||||
* vp9_short_idct16x16() is modified. */
|
||||
vp9_short_idct1_16x16_c(&in, &out);
|
||||
vp9_short_idct16x16_1_add(input, dest, stride);
|
||||
input[0] = 0;
|
||||
|
||||
vp9_add_constant_residual_16x16(out, dest, stride);
|
||||
} else if (eob <= 10) {
|
||||
vp9_short_idct10_16x16_add(input, dest, stride);
|
||||
vpx_memset(input, 0, 512);
|
||||
|
@ -61,7 +61,9 @@ static void inverse_transform_b_8x8_add(MACROBLOCKD *xd, int eob,
|
||||
static void inverse_transform_b_16x16_add(MACROBLOCKD *xd, int eob,
|
||||
int16_t *dqcoeff, uint8_t *dest,
|
||||
int stride) {
|
||||
if (eob <= 10)
|
||||
if (eob <= 1)
|
||||
vp9_short_idct16x16_1_add(dqcoeff, dest, stride);
|
||||
else if (eob <= 10)
|
||||
vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
|
||||
else
|
||||
vp9_short_idct16x16_add(dqcoeff, dest, stride);
|
||||
|
Loading…
x
Reference in New Issue
Block a user