Re-design all-zero-coeff block index buffer use

Use the zcoeff_blk buffer of PICK_MODE_CONTEXT to store the indexes
of all-zero-coeff block of the current best mode. Remove the temporary
buffer best_zcoeff_blk defined in the rate-distortion optimization
loop. This improves the speed performance by about 0.5% in all speed
settings.

Change-Id: Ie3e15988ddfa581eafa2e19a8228d3fe4a46095c
This commit is contained in:
Jingning Han 2013-10-14 16:03:23 -07:00
parent f60a3910c4
commit 8e3ce1a9e3
2 changed files with 6 additions and 21 deletions

View File

@ -26,7 +26,7 @@ typedef struct {
// Structure to hold snapshot of coding context during the mode picking process
typedef struct {
MODE_INFO mic;
unsigned char zcoeff_blk[256];
uint8_t zcoeff_blk[256];
int skip;
int_mv best_ref_mv;
int_mv second_best_ref_mv;
@ -126,7 +126,7 @@ struct macroblock {
int mv_row_min;
int mv_row_max;
unsigned char zcoeff_blk[TX_SIZES][256];
uint8_t zcoeff_blk[TX_SIZES][256];
int skip;
int encode_breakout;

View File

@ -2222,9 +2222,6 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[xd->this_mi->mbmi.tx_size],
sizeof(ctx->zcoeff_blk));
vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
@ -3140,11 +3137,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
int best_skip2 = 0;
unsigned char best_zcoeff_blk[256] = { 0 };
x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
vp9_zero(x->zcoeff_blk);
vp9_zero(ctx->zcoeff_blk);
// Everywhere the flag is set the error is much higher than its neighbors.
ctx->frames_with_high_error = 0;
@ -3575,8 +3569,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_rd = this_rd;
best_mbmode = *mbmi;
best_skip2 = this_skip2;
vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(best_zcoeff_blk));
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(ctx->zcoeff_blk));
// TODO(debargha): enhance this test with a better distortion prediction
// based on qp, activity mask and history
@ -3742,9 +3736,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*mbmi = best_mbmode;
x->skip |= best_skip2;
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk,
sizeof(best_zcoeff_blk));
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
if (best_pred_rd[i] == INT64_MAX)
best_pred_diff[i] = INT_MIN;
@ -3838,11 +3829,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int_mv seg_mvs[4][MAX_REF_FRAMES];
b_mode_info best_bmodes[4];
int best_skip2 = 0;
unsigned char best_zcoeff_blk[256] = { 0 };
x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
vp9_zero(x->zcoeff_blk);
vp9_zero(ctx->zcoeff_blk);
for (i = 0; i < 4; i++) {
int j;
@ -4319,8 +4308,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
best_mbmode = *mbmi;
best_skip2 = this_skip2;
vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(best_zcoeff_blk));
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(ctx->zcoeff_blk));
for (i = 0; i < 4; i++)
best_bmodes[i] = xd->this_mi->bmi[i];
@ -4483,9 +4472,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int;
}
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk,
sizeof(best_zcoeff_blk));
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
if (best_pred_rd[i] == INT64_MAX)
best_pred_diff[i] = INT_MIN;
@ -4527,4 +4513,3 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
return best_rd;
}