Remove usage of predict buffer for decode

Instead of using the predict buffer, the decoder now writes
the predictor into the recon buffer.  For blocks with eob=0,
unnecessary idcts can be eliminated.  This gave a performance
boost of ~1.8% for the HD clips used.

Tero: Added needed changes to ARM side and scheduled some
      assembly code to prevent interlocks.

Patch Set 6:  Merged (I1bcdca7a95aacc3a181b9faa6b10e3a71ee24df3)
into this commit because of similarities in the idct
functions.
Patch Set 7: EC bug fix.

Change-Id: Ie31d90b5d3522e1108163f2ac491e455e3f955e6
This commit is contained in:
Scott LaVarnway
2011-10-18 12:06:50 -04:00
parent 6505adf271
commit ed9c66f584
56 changed files with 1496 additions and 2455 deletions

View File

@@ -138,11 +138,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
/* do prediction */
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
if (xd->mode_info_context->mbmi.mode != B_PRED)
{
vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
} else {
vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
}
@@ -201,7 +201,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
(xd->qcoeff, xd->block[0].dequant,
xd->predictor, xd->dst.y_buffer,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs, xd->block[24].diff);
}
else if (xd->mode_info_context->mbmi.mode == B_PRED)
@@ -211,19 +211,21 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
BLOCKD *b = &xd->block[i];
int b_mode = xd->mode_info_context->bmi[i].as_mode;
vp8mt_predict_intra4x4(pbi, xd, b_mode, b->predictor, mb_row, mb_col, i);
vp8mt_predict_intra4x4(pbi, xd, b_mode, *(b->base_dst) + b->dst,
b->dst_stride, mb_row, mb_col, i);
if (xd->eobs[i] > 1)
{
DEQUANT_INVOKE(&pbi->dequant, idct_add)
(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
(b->qcoeff, b->dequant,
*(b->base_dst) + b->dst, b->dst_stride);
}
else
{
IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
(b->qcoeff[0] * b->dequant[0], b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
(b->qcoeff[0] * b->dequant[0],
*(b->base_dst) + b->dst, b->dst_stride,
*(b->base_dst) + b->dst, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
}
}
@@ -232,13 +234,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
{
DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
(xd->qcoeff, xd->block[0].dequant,
xd->predictor, xd->dst.y_buffer,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
}
DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
}