Remove usage of predict buffer for decode

Instead of using the predict buffer, the decoder now writes
the predictor into the recon buffer.  For blocks with eob=0,
unnecessary idcts can be eliminated.  This gave a performance
boost of ~1.8% for the HD clips used.

Tero: Added needed changes to ARM side and scheduled some
      assembly code to prevent interlocks.

Patch Set 6:  Merged (I1bcdca7a95aacc3a181b9faa6b10e3a71ee24df3)
into this commit because of similarities in the idct
functions.
Patch Set 7: EC bug fix.

Change-Id: Ie31d90b5d3522e1108163f2ac491e455e3f955e6
This commit is contained in:
Scott LaVarnway
2011-10-18 12:06:50 -04:00
parent 6505adf271
commit ed9c66f584
56 changed files with 1496 additions and 2455 deletions

View File

@@ -631,7 +631,7 @@ static int rd_pick_intra4x4block(
rate = bmode_costs[mode];
RECON_INVOKE(&cpi->rtcd.common->recon, intra4x4_predict)
(b, mode, b->predictor);
(b, mode, b->predictor, 16);
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b(be, b);
@@ -660,8 +660,8 @@ static int rd_pick_intra4x4block(
}
b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32);
RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff,
best_predictor, 16, *(b->base_dst) + b->dst, b->dst_stride);
return best_rd;
}