diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 71854a7f0..6e32198d8 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -204,15 +204,9 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { const TX_TYPE tx_type = get_tx_type_16x16(xd, 0); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff, - xd->block[0].dequant, xd->dst.y_buffer, - xd->dst.y_stride, xd->plane[0].eobs[0]); - } else { - vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant, - xd->dst.y_buffer, xd->dst.y_stride, - xd->plane[0].eobs[0]); - } + vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff, + xd->block[0].dequant, xd->dst.y_buffer, + xd->dst.y_stride, xd->plane[0].eobs[0]); vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.uv_stride, @@ -244,12 +238,8 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra8x8_predict(xd, b, i8x8mode, dst, stride); } tx_type = get_tx_type_8x8(xd, ib); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, stride, - xd->plane[0].eobs[idx]); - } else { - vp9_dequant_idct_add_8x8(q, dq, dst, stride, xd->plane[0].eobs[idx]); - } + vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, stride, + xd->plane[0].eobs[idx]); } } else { vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff, @@ -295,6 +285,22 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, } } +static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx) { + BLOCKD *const b = &xd->block[idx]; + struct mb_plane *const y = &xd->plane[0]; + if (tx_type != DCT_DCT) { + vp9_dequant_iht_add_c(tx_type, + BLOCK_OFFSET(y->qcoeff, idx, 16), + b->dequant, *(b->base_dst) + b->dst, + b->dst_stride, y->eobs[idx]); + } else { + xd->itxm_add(BLOCK_OFFSET(y->qcoeff, idx, 16), + b->dequant, *(b->base_dst) + b->dst, + b->dst_stride, y->eobs[idx]); + } +} + + static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { TX_TYPE tx_type; @@ -310,19 +316,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra8x8_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, b->dst_stride); for (j = 0; j < 4; j++) { - b = &xd->block[ib + iblock[j]]; tx_type = get_tx_type_4x4(xd, ib + iblock[j]); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_c(tx_type, - BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), - b->dequant, *(b->base_dst) + b->dst, - b->dst_stride, - xd->plane[0].eobs[ib + iblock[j]]); - } else { - xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), - b->dequant, *(b->base_dst) + b->dst, b->dst_stride, - xd->plane[0].eobs[ib + iblock[j]]); - } + dequant_add_y(xd, tx_type, ib + iblock[j]); } b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, @@ -350,16 +345,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst, b->dst_stride); tx_type = get_tx_type_4x4(xd, i); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_c(tx_type, - BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - b->dst_stride, xd->plane[0].eobs[i]); - } else { - xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, b->dst_stride, - xd->plane[0].eobs[i]); - } + dequant_add_y(xd, tx_type, i); } #if CONFIG_NEWBINTRAMODES if (!xd->mode_info_context->mbmi.mb_skip_coeff) @@ -380,18 +366,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs); } else { for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; tx_type = get_tx_type_4x4(xd, i); - if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_c(tx_type, - BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, - b->dst_stride, xd->plane[0].eobs[i]); - } else { - xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, *(b->base_dst) + b->dst, b->dst_stride, - xd->plane[0].eobs[i]); - } + dequant_add_y(xd, tx_type, i); } xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.uv_stride, @@ -451,19 +427,12 @@ static INLINE void decode_sby_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) { const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16); const TX_TYPE tx_type = get_tx_type_16x16(mb, (y_idx * (4 * bw) + x_idx) * 4); - if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), - mb->block[0].dequant , - mb->dst.y_buffer + y_offset, mb->dst.y_stride, - mb->plane[0].eobs[n * 16]); - } else { - vp9_dequant_iht_add_16x16_c(tx_type, - BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, - mb->plane[0].eobs[n * 16]); - } + vp9_dequant_iht_add_16x16_c(tx_type, + BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), + mb->block[0].dequant, + mb->dst.y_buffer + y_offset, + mb->dst.y_stride, + mb->plane[0].eobs[n * 16]); } } @@ -503,18 +472,12 @@ static INLINE void decode_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8); const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * (2 * bw) + x_idx) * 2); - if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, - xd->dst.y_stride, xd->plane[0].eobs[n * 4]); - } else { - vp9_dequant_iht_add_8x8_c(tx_type, - BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), - xd->block[0].dequant, - xd->dst.y_buffer + y_offset, xd->dst.y_stride, - xd->plane[0].eobs[n * 4]); - } + + vp9_dequant_iht_add_8x8_c(tx_type, + BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, xd->dst.y_stride, + xd->plane[0].eobs[n * 4]); } } diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index c88f61be2..09302014e 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -75,32 +75,40 @@ void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest, void vp9_dequant_iht_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob) { - int i; - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); - for (i = 0; i < 16; i++) - input[i] *= dq[i]; + if (tx_type == DCT_DCT) { + vp9_dequant_idct_add(input, dq, dest, stride, eob); + } else { + int i; + DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16); - vp9_short_iht4x4(input, output, 4, tx_type); - vpx_memset(input, 0, 32); - vp9_add_residual_4x4(output, dest, stride); + for (i = 0; i < 16; i++) + input[i] *= dq[i]; + + vp9_short_iht4x4(input, output, 4, tx_type); + vpx_memset(input, 0, 32); + vp9_add_residual_4x4(output, dest, stride); + } } void vp9_dequant_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64); + if (tx_type == DCT_DCT) { + vp9_dequant_idct_add_8x8(input, dq, dest, stride, eob); + } else { + if (eob > 0) { + int i; + DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64); - if (eob > 0) { - int i; + input[0] *= dq[0]; + for (i = 1; i < 64; i++) + input[i] *= dq[1]; - input[0] *= dq[0]; - for (i = 1; i < 64; i++) - input[i] *= dq[1]; - - vp9_short_iht8x8(input, output, 8, tx_type); - vpx_memset(input, 0, 128); - vp9_add_residual_8x8(output, dest, stride); + vp9_short_iht8x8(input, output, 8, tx_type); + vpx_memset(input, 0, 128); + vp9_add_residual_8x8(output, dest, stride); + } } } @@ -236,26 +244,22 @@ void vp9_dequant_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, uint8_t *dest, int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); + if (tx_type == DCT_DCT) { + vp9_dequant_idct_add_16x16(input, dq, dest, stride, eob); + } else { + DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256); - if (eob > 0) { - int i; + if (eob > 0) { + int i; - input[0] *= dq[0]; + input[0] *= dq[0]; + for (i = 1; i < 256; i++) + input[i] *= dq[1]; - // recover quantizer for 4 4x4 blocks - for (i = 1; i < 256; i++) - input[i] *= dq[1]; - - // inverse hybrid transform - vp9_short_iht16x16(input, output, 16, tx_type); - - // the idct halves ( >> 1) the pitch - // vp9_short_idct16x16(input, output, 32); - - vpx_memset(input, 0, 512); - - vp9_add_residual_16x16(output, dest, stride); + vp9_short_iht16x16(input, output, 16, tx_type); + vpx_memset(input, 0, 512); + vp9_add_residual_16x16(output, dest, stride); + } } }