From b72ecbb1b98be5cb24e6f68d8d2eeda6efc38cc0 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 19 Jul 2013 09:17:00 -0700 Subject: [PATCH] Add best_rd breakout in intra4x4 RD loop. Encoding time of first 50 frames of bus (speed 0) @ 1500kbps goes from 1min5.4 to 1min4.0, i.e. 2.2% faster overall. Change-Id: I8c32f2aff9a649ce7dd49d910dc5ba16b99c3bc6 --- vp9/encoder/vp9_rdopt.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index dcb09d0ff..de2a78762 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1178,10 +1178,11 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion, - BLOCK_SIZE_TYPE bsize) { + BLOCK_SIZE_TYPE bsize, + int64_t rd_thresh) { MB_PREDICTION_MODE mode; MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = INT64_MAX; + int64_t best_rd = rd_thresh; int rate = 0; int64_t distortion; VP9_COMMON *const cm = &cpi->common; @@ -1262,6 +1263,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block, 16), 16, &ssz) >> 2; + if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) + goto next; if (tx_type != DCT_DCT) vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16), @@ -1284,6 +1287,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, best_tx_type = tx_type; vpx_memcpy(a, tempa, sizeof(tempa)); vpx_memcpy(l, templ, sizeof(templ)); + // FIXME(rbultje) why are we storing best_dqcoeff instead of the + // dst buffer here? for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { block = ib + idy * 2 + idx; @@ -1293,9 +1298,11 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, } } } + next: + {} } - if (x->skip_encode) + if (best_rd >= rd_thresh || x->skip_encode) return best_rd; for (idy = 0; idy < num_4x4_blocks_high; ++idy) { @@ -1352,7 +1359,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, const int mis = xd->mode_info_stride; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); - int64_t UNINITIALIZED_IS_SAFE(d); + int64_t UNINITIALIZED_IS_SAFE(d), this_rd; i = idy * 2 + idx; if (cpi->common.frame_type == KEY_FRAME) { @@ -1363,9 +1370,14 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, bmode_costs = mb->y_mode_costs[A][L]; } - total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, - t_above + idx, t_left + idy, - &r, &ry, &d, bsize); + this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, + t_above + idx, t_left + idy, + &r, &ry, &d, bsize, + best_rd - total_rd); + if (this_rd >= best_rd - total_rd) + return INT64_MAX; + + total_rd += this_rd; cost += r; distortion += d; tot_rate_y += ry; @@ -3511,8 +3523,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, */ mbmi->txfm_size = TX_4X4; - rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, - &distortion_y, INT64_MAX); + if (rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, + &distortion_y, best_rd) >= best_rd) + continue; rate2 += rate; rate2 += intra_cost_penalty; distortion2 += distortion_y;