Merge "Prunes out full-rd computation based on modeled rd"

This commit is contained in:
Deb Mukherjee 2013-07-10 15:37:11 -07:00 committed by Gerrit Code Review
commit 7494bba66b
4 changed files with 172 additions and 71 deletions

View File

@ -468,8 +468,8 @@ static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
}
#if CONFIG_ALPHA
// TODO(jkoleszar): Using the Y w/h for now
mb->plane[3].subsampling_x = 0;
mb->plane[3].subsampling_y = 0;
xd->plane[3].subsampling_x = 0;
xd->plane[3].subsampling_y = 0;
#endif
}

View File

@ -720,6 +720,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->disable_splitmv = 0;
sf->mode_search_skip_flags = 0;
sf->last_chroma_intra_mode = TM_PRED;
sf->use_rd_breakout = 0;
// Skip any mode not chosen at size < X for all sizes > X
// Hence BLOCK_SIZE_SB64X64 (skip is off)
@ -767,6 +768,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA;
sf->last_chroma_intra_mode = H_PRED;
sf->use_rd_breakout = 1;
}
if (speed == 2) {
sf->adjust_thresholds_by_speed = 1;
@ -790,6 +792,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
sf->last_chroma_intra_mode = DC_PRED;
sf->use_rd_breakout = 1;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@ -804,6 +807,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
sf->use_rd_breakout = 1;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@ -818,6 +822,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
sf->use_rd_breakout = 1;
}
/*
if (speed == 2) {

View File

@ -275,6 +275,7 @@ typedef struct {
// defined in the MODE_SEARCH_SKIP_HEURISTICS enum
unsigned int mode_search_skip_flags;
MB_PREDICTION_MODE last_chroma_intra_mode;
int use_rd_breakout;
} SPEED_FEATURES;
enum BlockSize {

View File

@ -473,6 +473,31 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
*out_dist_sum = dist_sum << 4;
}
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
// TODO(dkovalev) the same code in get_plane_block_size
const int bw = plane_block_width(bsize, pd);
const int bh = plane_block_height(bsize, pd);
const enum BlockSize bs = get_block_size(bw, bh);
unsigned int sse;
int rate;
int64_t dist;
(void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
// sse works better than var, since there is no dc prediction used
model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
*out_rate_sum = rate;
*out_dist_sum = dist << 4;
}
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
TX_SIZE tx_size,
MACROBLOCK *x, MACROBLOCKD *xd,
@ -1643,14 +1668,16 @@ static int labels2mode(MACROBLOCK *x, int i,
return cost;
}
static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
MACROBLOCK *x,
int64_t best_yrd,
int i,
int *labelyrate,
int64_t *distortion,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
int k;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
const int bw = plane_block_width(bsize, &xd->plane[0]);
@ -1672,9 +1699,6 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
int64_t thisdistortion = 0;
int thisrate = 0;
*labelyrate = 0;
*distortion = 0;
vp9_build_inter_predictor(pre,
xd->plane[0].pre[0].stride,
dst,
@ -1684,9 +1708,6 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
bw, bh, 0 /* no avg */, &xd->subpix,
MV_PRECISION_Q3);
// TODO(debargha): Make this work properly with the
// implicit-compoundinter-weight experiment when implicit
// weighting for splitmv modes is turned on.
if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
uint8_t* const second_pre =
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
@ -1699,10 +1720,28 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
&xd->subpix, MV_PRECISION_Q3);
}
// Turning this section off for now since it hurts quality and does not
// improve speed much
/*
if (cpi->sf.use_rd_breakout &&
best_yrd < INT64_MAX) {
int64_t thisrd;
model_rd_for_sb_y(cpi, bsize, x, xd, &thisrate, &thisdistortion);
thisrd = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion);
if (thisrd / 2 > best_yrd) {
*distortion = thisdistortion;
*labelyrate = thisrate;
return thisrd;
}
}
*/
vp9_subtract_block(bh, bw, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
*labelyrate = 0;
*distortion = 0;
k = i;
for (idy = 0; idy < bh / 4; ++idy) {
for (idx = 0; idx < bw / 4; ++idx) {
@ -1788,7 +1827,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
MB_PREDICTION_MODE this_mode;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
const int label_count = 4;
int64_t this_segment_rd = 0, other_segment_rd;
int64_t this_segment_rd = 0;
int label_mv_thresh;
int segmentyrate = 0;
int best_eobs[4] = { 0 };
@ -1811,15 +1850,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
other_segment_rd = this_segment_rd;
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
// TODO(jingning,rbultje): rewrite the rate-distortion optimization
// loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
int64_t best_label_rd = INT64_MAX;
MB_PREDICTION_MODE mode_selected = ZEROMV;
int bestlabelyrate = 0;
i = idy * 2 + idx;
@ -1960,8 +1997,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
mv_check_bounds(x, &second_mode_mv[this_mode]))
continue;
this_rd = encode_inter_mb_segment(&cpi->common,
x, i, &labelyrate,
this_rd = encode_inter_mb_segment(cpi, x,
bsi->segment_rd - this_segment_rd,
i, &labelyrate,
&distortion, t_above_s, t_left_s);
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
@ -1990,7 +2028,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bd += sbd;
segmentyrate += bestlabelyrate;
this_segment_rd += best_label_rd;
other_segment_rd += best_other_rd;
if (this_segment_rd > bsi->segment_rd) {
bsi->segment_rd = INT64_MAX;
return;
}
for (j = 1; j < bh; ++j)
vpx_memcpy(&x->partition_info->bmi[i + j * 2],
@ -2003,33 +2045,31 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
}
} /* for each label */
if (this_segment_rd < bsi->segment_rd) {
bsi->r = br;
bsi->d = bd;
bsi->segment_yrate = segmentyrate;
bsi->segment_rd = this_segment_rd;
bsi->r = br;
bsi->d = bd;
bsi->segment_yrate = segmentyrate;
bsi->segment_rd = this_segment_rd;
// store everything needed to come back to this!!
for (i = 0; i < 4; i++) {
bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
if (mbmi->ref_frame[1] > 0)
bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
bsi->modes[i] = x->partition_info->bmi[i].mode;
bsi->eobs[i] = best_eobs[i];
}
// store everything needed to come back to this!!
for (i = 0; i < 4; i++) {
bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
if (mbmi->ref_frame[1] > 0)
bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
bsi->modes[i] = x->partition_info->bmi[i].mode;
bsi->eobs[i] = best_eobs[i];
}
}
static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *best_ref_mv,
int_mv *second_best_ref_mv,
int64_t best_rd,
int *returntotrate,
int *returnyrate,
int64_t *returndistortion,
int *skippable, int mvthresh,
int_mv seg_mvs[4][MAX_REF_FRAMES],
int mi_row, int mi_col) {
static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *best_ref_mv,
int_mv *second_best_ref_mv,
int64_t best_rd,
int *returntotrate,
int *returnyrate,
int64_t *returndistortion,
int *skippable, int mvthresh,
int_mv seg_mvs[4][MAX_REF_FRAMES],
int mi_row, int mi_col) {
int i;
BEST_SEG_INFO bsi;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@ -2078,7 +2118,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
*skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
mbmi->mode = bsi.modes[3];
return (int)(bsi.segment_rd);
return bsi.segment_rd;
}
static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
@ -2585,6 +2625,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int best_needs_copy = 0;
uint8_t *orig_dst[MAX_MB_PLANE];
int orig_dst_stride[MAX_MB_PLANE];
int rs = 0;
switch (this_mode) {
int rate_mv;
@ -2658,6 +2699,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rate2 += cost_mv_ref(cpi, this_mode,
mbmi->mb_mode_context[mbmi->ref_frame[0]]);
if (!(*mode_excluded)) {
if (is_comp_pred) {
*mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
} else {
*mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
}
}
pred_exists = 0;
interpolating_intpel_seen = 0;
// Are all MVs integer pel for Y and UV
@ -2668,6 +2717,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(mbmi->mv[1].as_mv.col & 15) == 0;
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
*best_filter = EIGHTTAP;
if (cpi->sf.use_8tap_always) {
*best_filter = EIGHTTAP;
vp9_zero(cpi->rd_filter_cache);
@ -2678,7 +2728,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
int rs, j;
int j;
int64_t rs_rd;
const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
const int is_intpel_interp = intpel_mv &&
@ -2730,6 +2780,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
tmp_dist_sum = dist_sum;
}
}
if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
if (rd / 2 > ref_best_rd) {
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = orig_dst[i];
xd->plane[i].dst.stride = orig_dst_stride[i];
}
return INT64_MAX;
}
}
newbest = i == 0 || rd < best_rd;
if (newbest) {
@ -2753,11 +2812,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[i].dst.stride = orig_dst_stride[i];
}
}
// Set the appripriate filter
mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
cm->mcomp_filter_type : *best_filter;
vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0);
if (pred_exists) {
if (best_needs_copy) {
@ -2773,6 +2832,23 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
}
if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
int tmp_rate;
int64_t tmp_dist;
model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
// if current pred_error modeled rd is substantially more than the best
// so far, do not bother doing full rd
if (rd / 2 > ref_best_rd) {
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = orig_dst[i];
xd->plane[i].dst.stride = orig_dst_stride[i];
}
return INT64_MAX;
}
}
if (cpi->common.mcomp_filter_type == SWITCHABLE)
*rate2 += get_switchable_rate(cm, x);
@ -2817,7 +2893,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*distortion = sse + sse2;
*rate2 = 500;
// for best_yrd calculation
// for best yrd calculation
*rate_uv = 0;
*distortion_uv = sse2;
@ -2858,14 +2934,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*skippable = skippable_y && skippable_uv;
}
if (!(*mode_excluded)) {
if (is_comp_pred) {
*mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
} else {
*mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
}
}
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = orig_dst[i];
xd->plane[i].dst.stride = orig_dst_stride[i];
@ -2962,6 +3030,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
cpi->gld_fb_idx,
cpi->alt_fb_idx};
int64_t best_rd = INT64_MAX;
int64_t best_yrd = INT64_MAX;
int64_t best_txfm_rd[NB_TXFM_MODES];
int64_t best_txfm_diff[NB_TXFM_MODES];
int64_t best_pred_diff[NB_PREDICTION_TYPES];
@ -3357,16 +3426,20 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int newbest, rs;
int64_t rs_rd;
mbmi->interp_filter =
vp9_switchable_interp[switchable_filter_index];
vp9_switchable_interp[switchable_filter_index];
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
second_ref, INT64_MAX,
second_ref,
best_yrd,
&rate, &rate_y, &distortion,
&skippable,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
if (tmp_rd == INT64_MAX) {
continue;
}
cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
rs = get_switchable_rate(cm, x);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
@ -3374,6 +3447,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
if (cm->mcomp_filter_type == SWITCHABLE)
tmp_rd += rs_rd;
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
tmp_best_filter = mbmi->interp_filter;
@ -3392,8 +3466,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < 4; i++)
tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
pred_exists = 1;
if (switchable_filter_index == 0 &&
cpi->sf.use_rd_breakout &&
best_rd < INT64_MAX) {
if (tmp_best_rdu / 2 > best_rd) {
// skip searching the other filters if the first is
// already substantially larger than the best so far
tmp_best_filter = mbmi->interp_filter;
tmp_best_rdu = INT64_MAX;
break;
}
}
}
} // switchable_filter_index loop
if (tmp_best_rdu == INT64_MAX)
continue;
mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
tmp_best_filter : cm->mcomp_filter_type);
@ -3403,11 +3490,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// switchable list (bilinear, 6-tap) is indicated at the frame level
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
second_ref, INT64_MAX,
second_ref,
best_yrd,
&rate, &rate_y, &distortion,
&skippable,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
if (tmp_rd == INT64_MAX)
continue;
} else {
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
int rs = get_switchable_rate(cm, x);
@ -3430,29 +3520,32 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->common.mcomp_filter_type == SWITCHABLE)
rate2 += get_switchable_rate(cm, x);
// If even the 'Y' rd value of split is higher than best so far
// then dont bother looking at UV
vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
BLOCK_SIZE_SB8X8);
vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
&uv_skippable, NULL, BLOCK_SIZE_SB8X8, TX_4X4);
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
for (i = 0; i < NB_TXFM_MODES; ++i)
txfm_cache[i] = txfm_cache[ONLY_4X4];
if (!mode_excluded) {
if (is_comp_pred)
mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
else
mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
}
compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
if (RDCOST(x->rdmult, x->rddiv, rate2, distortion2) <
best_rd) {
// If even the 'Y' rd value of split is higher than best so far
// then dont bother looking at UV
vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
BLOCK_SIZE_SB8X8);
vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
&uv_skippable, NULL,
BLOCK_SIZE_SB8X8, TX_4X4);
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
for (i = 0; i < NB_TXFM_MODES; ++i)
txfm_cache[i] = txfm_cache[ONLY_4X4];
}
} else {
compmode_cost = vp9_cost_bit(comp_mode_p,
mbmi->ref_frame[1] > INTRA_FRAME);
@ -3494,7 +3587,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
// for best_yrd calculation
// for best yrd calculation
rate_uv = 0;
if (mb_skip_allowed) {
@ -3592,6 +3685,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = rate2;
*returndistortion = distortion2;
best_rd = this_rd;
best_yrd = best_rd -
RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
best_mbmode = *mbmi;
best_skip2 = this_skip2;
best_partition = *x->partition_info;