Remove usage of predict buffer for decode
Instead of using the predict buffer, the decoder now writes the predictor into the recon buffer. For blocks with eob=0, unnecessary idcts can be eliminated. This gave a performance boost of ~1.8% for the HD clips used. Tero: Added needed changes to ARM side and scheduled some assembly code to prevent interlocks. Patch Set 6: Merged (I1bcdca7a95aacc3a181b9faa6b10e3a71ee24df3) into this commit because of similarities in the idct functions. Patch Set 7: EC bug fix. Change-Id: Ie31d90b5d3522e1108163f2ac491e455e3f955e6
This commit is contained in:
@@ -138,11 +138,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
|
||||
/* do prediction */
|
||||
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
|
||||
{
|
||||
vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
|
||||
vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode != B_PRED)
|
||||
{
|
||||
vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
|
||||
vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
|
||||
} else {
|
||||
vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
|
||||
}
|
||||
@@ -201,7 +201,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
|
||||
|
||||
DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
|
||||
(xd->qcoeff, xd->block[0].dequant,
|
||||
xd->predictor, xd->dst.y_buffer,
|
||||
xd->dst.y_buffer,
|
||||
xd->dst.y_stride, xd->eobs, xd->block[24].diff);
|
||||
}
|
||||
else if (xd->mode_info_context->mbmi.mode == B_PRED)
|
||||
@@ -211,19 +211,21 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
|
||||
BLOCKD *b = &xd->block[i];
|
||||
int b_mode = xd->mode_info_context->bmi[i].as_mode;
|
||||
|
||||
vp8mt_predict_intra4x4(pbi, xd, b_mode, b->predictor, mb_row, mb_col, i);
|
||||
vp8mt_predict_intra4x4(pbi, xd, b_mode, *(b->base_dst) + b->dst,
|
||||
b->dst_stride, mb_row, mb_col, i);
|
||||
|
||||
if (xd->eobs[i] > 1)
|
||||
{
|
||||
DEQUANT_INVOKE(&pbi->dequant, idct_add)
|
||||
(b->qcoeff, b->dequant, b->predictor,
|
||||
*(b->base_dst) + b->dst, 16, b->dst_stride);
|
||||
(b->qcoeff, b->dequant,
|
||||
*(b->base_dst) + b->dst, b->dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
|
||||
(b->qcoeff[0] * b->dequant[0], b->predictor,
|
||||
*(b->base_dst) + b->dst, 16, b->dst_stride);
|
||||
(b->qcoeff[0] * b->dequant[0],
|
||||
*(b->base_dst) + b->dst, b->dst_stride,
|
||||
*(b->base_dst) + b->dst, b->dst_stride);
|
||||
((int *)b->qcoeff)[0] = 0;
|
||||
}
|
||||
}
|
||||
@@ -232,13 +234,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
|
||||
{
|
||||
DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
|
||||
(xd->qcoeff, xd->block[0].dequant,
|
||||
xd->predictor, xd->dst.y_buffer,
|
||||
xd->dst.y_buffer,
|
||||
xd->dst.y_stride, xd->eobs);
|
||||
}
|
||||
|
||||
DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
|
||||
(xd->qcoeff+16*16, xd->block[16].dequant,
|
||||
xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd->eobs+16);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user