Optimize 16x16 dequant and idct
As suggested by Yaowu, simplified 16x16 dequant and idct. In decoder, after detoken step, we know the number of non-zero dct coefficients (eobs) in a macroblock. Idct calculation can be skipped or simplified based on eobs, which improves the decoder performance. Change-Id: I9ffa1cb134bcb5a7d64fcf90c81871a96d1b4018
This commit is contained in:
parent
4626faf1e7
commit
6c17c9fae0
@ -32,6 +32,7 @@ void vp9_machine_specific_config(VP9_COMMON *ctx) {
|
||||
rtcd->idct.idct1_scalar_add_8x8 = vp9_dc_only_idct_add_8x8_c;
|
||||
rtcd->idct.ihaar2 = vp9_short_ihaar2x2_c;
|
||||
rtcd->idct.idct16x16 = vp9_short_idct16x16_c;
|
||||
rtcd->idct.idct10_16x16 = vp9_short_idct10_16x16_c;
|
||||
|
||||
rtcd->subpix.eighttap16x16 = vp9_eighttap_predict16x16_c;
|
||||
rtcd->subpix.eighttap8x8 = vp9_eighttap_predict8x8_c;
|
||||
|
@ -50,6 +50,11 @@
|
||||
#endif
|
||||
extern prototype_idct(vp9_idct_idct16x16);
|
||||
|
||||
#ifndef vp9_idct_idct10_16x16
|
||||
#define vp9_idct_idct10_16x16 vp9_short_idct10_16x16_c
|
||||
#endif
|
||||
extern prototype_idct(vp9_idct_idct10_16x16);
|
||||
|
||||
#ifndef vp9_idct_idct8
|
||||
#define vp9_idct_idct8 vp9_short_idct8x8_c
|
||||
#endif
|
||||
@ -133,6 +138,7 @@ typedef struct {
|
||||
vp9_idct_fn_t ihaar2_1;
|
||||
|
||||
vp9_idct_fn_t idct16x16;
|
||||
vp9_idct_fn_t idct10_16x16;
|
||||
} vp9_idct_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -1502,6 +1502,161 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {
|
||||
output[j * 16 + i] = temp_out[j];
|
||||
}
|
||||
}
|
||||
|
||||
/* The following function is called when we know the maximum number of non-zero
|
||||
* dct coefficients is less or equal 10.
|
||||
*/
|
||||
static void butterfly_16x16_idct10_1d(int16_t input[16], int16_t output[16],
|
||||
int last_shift_bits) {
|
||||
int16_t step[16] = {0};
|
||||
int intermediate[16] = {0};
|
||||
int temp1, temp2;
|
||||
int last_rounding = 0;
|
||||
|
||||
if (last_shift_bits > 0)
|
||||
last_rounding = 1 << (last_shift_bits - 1);
|
||||
|
||||
// step 1 and 2
|
||||
step[ 0] = (input[0] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[ 1] = (input[0] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
|
||||
temp1 = (2 * (input[2] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
step[ 4] = (temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[ 5] = (temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
|
||||
// for odd input
|
||||
temp1 = (input[3] * C12 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
temp1 *= C8;
|
||||
intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = (-input[3] * C4 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
temp1 *= C8;
|
||||
intermediate[ 9] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
step[ 8] = (intermediate[ 8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[ 9] = (intermediate[ 9] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[10] = (-input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[11] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[12] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[13] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[14] = (intermediate[ 8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
step[15] = (intermediate[ 9] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
|
||||
|
||||
// step 3
|
||||
output[0] = step[ 0];
|
||||
output[1] = step[ 1];
|
||||
output[2] = step[ 1];
|
||||
output[3] = step[ 0];
|
||||
|
||||
temp1 = step[ 4] * C14;
|
||||
output[4] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = step[ 4] * C2;
|
||||
output[7] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = step[ 5] * C10;
|
||||
output[5] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = step[ 5] * C6;
|
||||
output[6] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
output[8] = step[ 8] + step[11];
|
||||
output[9] = step[ 9] + step[10];
|
||||
output[10] = step[ 9] - step[10];
|
||||
output[11] = step[ 8] - step[11];
|
||||
output[12] = step[12] + step[15];
|
||||
output[13] = step[13] + step[14];
|
||||
output[14] = step[13] - step[14];
|
||||
output[15] = step[12] - step[15];
|
||||
|
||||
// output 4
|
||||
step[ 0] = output[0] + output[7];
|
||||
step[ 1] = output[1] + output[6];
|
||||
step[ 2] = output[2] + output[5];
|
||||
step[ 3] = output[3] + output[4];
|
||||
step[ 4] = output[3] - output[4];
|
||||
step[ 5] = output[2] - output[5];
|
||||
step[ 6] = output[1] - output[6];
|
||||
step[ 7] = output[0] - output[7];
|
||||
|
||||
temp1 = output[8] * C7;
|
||||
temp2 = output[15] * C9;
|
||||
step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = output[9] * C11;
|
||||
temp2 = output[14] * C5;
|
||||
step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = output[10] * C3;
|
||||
temp2 = output[13] * C13;
|
||||
step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = output[11] * C15;
|
||||
temp2 = output[12] * C1;
|
||||
step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = output[11] * C1;
|
||||
temp2 = output[12] * C15;
|
||||
step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = output[10] * C13;
|
||||
temp2 = output[13] * C3;
|
||||
step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = output[9] * C5;
|
||||
temp2 = output[14] * C11;
|
||||
step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
temp1 = output[8] * C9;
|
||||
temp2 = output[15] * C7;
|
||||
step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
|
||||
|
||||
// step 5
|
||||
output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits;
|
||||
output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits;
|
||||
output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits;
|
||||
output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits;
|
||||
output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits;
|
||||
output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits;
|
||||
output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits;
|
||||
output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits;
|
||||
|
||||
output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits;
|
||||
output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits;
|
||||
output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits;
|
||||
output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits;
|
||||
output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits;
|
||||
output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits;
|
||||
output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits;
|
||||
output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits;
|
||||
}
|
||||
|
||||
void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) {
|
||||
int16_t out[16 * 16];
|
||||
int16_t *outptr = &out[0];
|
||||
const int short_pitch = pitch >> 1;
|
||||
int i, j;
|
||||
int16_t temp_in[16], temp_out[16];
|
||||
|
||||
/* First transform rows. Since all non-zero dct coefficients are in
|
||||
* upper-left 4x4 area, we only need to calculate first 4 rows here.
|
||||
*/
|
||||
vpx_memset(out, 0, sizeof(out));
|
||||
for (i = 0; i < 4; ++i) {
|
||||
butterfly_16x16_idct10_1d(input, outptr, 0);
|
||||
input += short_pitch;
|
||||
outptr += 16;
|
||||
}
|
||||
|
||||
// Then transform columns
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = out[j*16 + i];
|
||||
butterfly_16x16_idct10_1d(temp_in, temp_out, 3);
|
||||
for (j = 0; j < 16; ++j)
|
||||
output[j*16 + i] = temp_out[j];
|
||||
}
|
||||
}
|
||||
#undef INITIAL_SHIFT
|
||||
#undef INITIAL_ROUNDING
|
||||
#undef RIGHT_SHIFT
|
||||
|
@ -54,7 +54,7 @@ specialize vp9_dequant_idct_add_y_block_8x8
|
||||
prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs, struct macroblockd *xd"
|
||||
specialize vp9_dequant_idct_add_uv_block_8x8
|
||||
|
||||
prototype void vp9_dequant_idct_add_16x16 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"
|
||||
prototype void vp9_dequant_idct_add_16x16 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, unsigned short eobs"
|
||||
specialize vp9_dequant_idct_add_16x16
|
||||
|
||||
prototype void vp9_dequant_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"
|
||||
|
@ -401,7 +401,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
||||
} else {
|
||||
vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
|
||||
xd->predictor, xd->dst.y_buffer,
|
||||
16, xd->dst.y_stride);
|
||||
16, xd->dst.y_stride, xd->eobs[0]);
|
||||
}
|
||||
} else if (tx_size == TX_8X8) {
|
||||
#if CONFIG_SUPERBLOCKS
|
||||
|
@ -19,6 +19,28 @@
|
||||
extern int dec_debug;
|
||||
#endif
|
||||
|
||||
static void recon(int16_t *diff, uint8_t *pred, int pitch, uint8_t *dest,
|
||||
int stride, int width, int height) {
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < height; r++) {
|
||||
for (c = 0; c < width; c++) {
|
||||
int a = diff[c] + pred[c];
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
else if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dest[c] = (uint8_t) a;
|
||||
}
|
||||
|
||||
dest += stride;
|
||||
diff += width;
|
||||
pred += pitch;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_dequantize_b_c(BLOCKD *d) {
|
||||
|
||||
int i;
|
||||
@ -37,7 +59,6 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
|
||||
int pitch, int stride) {
|
||||
short output[16];
|
||||
short *diff_ptr = output;
|
||||
int r, c;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
@ -48,23 +69,7 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
|
||||
|
||||
vpx_memset(input, 0, 32);
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
int a = diff_ptr[c] + pred[c];
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dest[c] = (unsigned char) a;
|
||||
}
|
||||
|
||||
dest += stride;
|
||||
diff_ptr += 4;
|
||||
pred += pitch;
|
||||
}
|
||||
recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
|
||||
}
|
||||
|
||||
void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
|
||||
@ -115,7 +120,6 @@ void vp9_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
|
||||
unsigned char *dest, int pitch, int stride) {
|
||||
short output[16];
|
||||
short *diff_ptr = output;
|
||||
int r, c;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
@ -127,23 +131,7 @@ void vp9_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
|
||||
|
||||
vpx_memset(input, 0, 32);
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
int a = diff_ptr[c] + pred[c];
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dest[c] = (unsigned char) a;
|
||||
}
|
||||
|
||||
dest += stride;
|
||||
diff_ptr += 4;
|
||||
pred += pitch;
|
||||
}
|
||||
recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
|
||||
}
|
||||
|
||||
void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
|
||||
@ -152,7 +140,6 @@ void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
|
||||
int i;
|
||||
short output[16];
|
||||
short *diff_ptr = output;
|
||||
int r, c;
|
||||
|
||||
input[0] = (short)Dc;
|
||||
|
||||
@ -165,23 +152,7 @@ void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
|
||||
|
||||
vpx_memset(input, 0, 32);
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
int a = diff_ptr[c] + pred[c];
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dest[c] = (unsigned char) a;
|
||||
}
|
||||
|
||||
dest += stride;
|
||||
diff_ptr += 4;
|
||||
pred += pitch;
|
||||
}
|
||||
recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
|
||||
}
|
||||
|
||||
#if CONFIG_LOSSLESS
|
||||
@ -190,7 +161,6 @@ void vp9_dequant_idct_add_lossless_c(short *input, short *dq,
|
||||
int pitch, int stride) {
|
||||
short output[16];
|
||||
short *diff_ptr = output;
|
||||
int r, c;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
@ -201,23 +171,7 @@ void vp9_dequant_idct_add_lossless_c(short *input, short *dq,
|
||||
|
||||
vpx_memset(input, 0, 32);
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
int a = diff_ptr[c] + pred[c];
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dest[c] = (unsigned char) a;
|
||||
}
|
||||
|
||||
dest += stride;
|
||||
diff_ptr += 4;
|
||||
pred += pitch;
|
||||
}
|
||||
recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
|
||||
}
|
||||
|
||||
void vp9_dequant_dc_idct_add_lossless_c(short *input, short *dq,
|
||||
@ -227,7 +181,6 @@ void vp9_dequant_dc_idct_add_lossless_c(short *input, short *dq,
|
||||
int i;
|
||||
short output[16];
|
||||
short *diff_ptr = output;
|
||||
int r, c;
|
||||
|
||||
input[0] = (short)dc;
|
||||
|
||||
@ -238,23 +191,7 @@ void vp9_dequant_dc_idct_add_lossless_c(short *input, short *dq,
|
||||
vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
|
||||
vpx_memset(input, 0, 32);
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
int a = diff_ptr[c] + pred[c];
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dest[c] = (unsigned char) a;
|
||||
}
|
||||
|
||||
dest += stride;
|
||||
diff_ptr += 4;
|
||||
pred += pitch;
|
||||
}
|
||||
recon(diff_ptr, pred, pitch, dest, stride, 4, 4);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -461,7 +398,7 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
|
||||
int pitch, int stride) {
|
||||
short output[256];
|
||||
short *diff_ptr = output;
|
||||
int r, c, i;
|
||||
int i;
|
||||
|
||||
input[0]= input[0] * dq[0];
|
||||
|
||||
@ -477,55 +414,80 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
|
||||
|
||||
vpx_memset(input, 0, 512);
|
||||
|
||||
for (r = 0; r < 16; r++) {
|
||||
for (c = 0; c < 16; c++) {
|
||||
int a = diff_ptr[c] + pred[c];
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
else if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dest[c] = (unsigned char) a;
|
||||
}
|
||||
|
||||
dest += stride;
|
||||
diff_ptr += 16;
|
||||
pred += pitch;
|
||||
}
|
||||
recon(diff_ptr, pred, pitch, dest, stride, 16, 16);
|
||||
}
|
||||
|
||||
void vp9_dequant_idct_add_16x16_c(short *input, short *dq, unsigned char *pred,
|
||||
unsigned char *dest, int pitch, int stride) {
|
||||
short output[256];
|
||||
short *diff_ptr = output;
|
||||
void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred,
|
||||
uint8_t *dest, int pitch, int stride,
|
||||
uint16_t eobs) {
|
||||
int16_t output[256];
|
||||
int16_t *diff_ptr = output;
|
||||
int r, c, i;
|
||||
|
||||
input[0]= input[0] * dq[0];
|
||||
/* The calculation can be simplified if there are not many non-zero dct
|
||||
* coefficients. Use eobs to separate different cases. */
|
||||
if (eobs == 0) {
|
||||
/* All 0 DCT coefficient */
|
||||
vp9_copy_mem16x16(pred, pitch, dest, stride);
|
||||
} else if (eobs == 1) {
|
||||
/* DC only DCT coefficient. */
|
||||
int16_t out;
|
||||
|
||||
// recover quantizer for 4 4x4 blocks
|
||||
for (i = 1; i < 256; i++)
|
||||
input[i] = input[i] * dq[1];
|
||||
out = (input[0] * dq[0] + 2) >> 2;
|
||||
out = (out + 2) >> 2;
|
||||
out = (out + 4) >> 3;
|
||||
|
||||
// the idct halves ( >> 1) the pitch
|
||||
vp9_short_idct16x16_c(input, output, 32);
|
||||
input[0] = 0;
|
||||
|
||||
vpx_memset(input, 0, 512);
|
||||
for (r = 0; r < 16; r++) {
|
||||
for (c = 0; c < 16; c++) {
|
||||
int a = out + pred[c];
|
||||
|
||||
for (r = 0; r < 16; r++) {
|
||||
for (c = 0; c < 16; c++) {
|
||||
int a = diff_ptr[c] + pred[c];
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
else if (a > 255)
|
||||
a = 255;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
else if (a > 255)
|
||||
a = 255;
|
||||
dest[c] = (uint8_t) a;
|
||||
}
|
||||
|
||||
dest[c] = (unsigned char) a;
|
||||
dest += stride;
|
||||
pred += pitch;
|
||||
}
|
||||
|
||||
dest += stride;
|
||||
diff_ptr += 16;
|
||||
pred += pitch;
|
||||
} else if (eobs <= 10) {
|
||||
input[0]= input[0] * dq[0];
|
||||
input[1] = input[1] * dq[1];
|
||||
input[2] = input[2] * dq[1];
|
||||
input[3] = input[3] * dq[1];
|
||||
input[16] = input[16] * dq[1];
|
||||
input[17] = input[17] * dq[1];
|
||||
input[18] = input[18] * dq[1];
|
||||
input[32] = input[32] * dq[1];
|
||||
input[33] = input[33] * dq[1];
|
||||
input[48] = input[48] * dq[1];
|
||||
|
||||
// the idct halves ( >> 1) the pitch
|
||||
vp9_short_idct10_16x16_c(input, output, 32);
|
||||
|
||||
input[0] = input[1] = input[2] = input[3] = 0;
|
||||
input[16] = input[17] = input[18] = 0;
|
||||
input[32] = input[33] = 0;
|
||||
input[48] = 0;
|
||||
|
||||
recon(diff_ptr, pred, pitch, dest, stride, 16, 16);
|
||||
} else {
|
||||
input[0]= input[0] * dq[0];
|
||||
|
||||
// recover quantizer for 4 4x4 blocks
|
||||
for (i = 1; i < 256; i++)
|
||||
input[i] = input[i] * dq[1];
|
||||
|
||||
// the idct halves ( >> 1) the pitch
|
||||
vp9_short_idct16x16_c(input, output, 32);
|
||||
|
||||
vpx_memset(input, 0, 512);
|
||||
|
||||
recon(diff_ptr, pred, pitch, dest, stride, 16, 16);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user