vp9: Encoding cycle reduction for speed 8.
1. Skip golden non-zeromv and newmv-last for bsize >= 16x16 if the temporal variance obtained from choose_partitioning is very low. 2. Skip horz and vert INTRA mode for speed 8. This change works best on the clips with little noise and with some motion (e.g. gips_motion which has > 5% speed up). PSNR drop is 1.78% on rtc test set, no obvious visual quality regression found. Change-Id: Ib43b5b20e67809d03c5a6890818ddff59e1fc94a
This commit is contained in:
parent
181988d372
commit
f9c0587200
@ -146,9 +146,9 @@ struct macroblock {
|
||||
uint8_t sb_is_skin;
|
||||
|
||||
// Used to save the status of whether a block has a low variance in
|
||||
// choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for
|
||||
// 32x32.
|
||||
uint8_t variance_low[9];
|
||||
// choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for
|
||||
// 32x32, 9~24 for 16x16.
|
||||
uint8_t variance_low[25];
|
||||
|
||||
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
|
||||
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
|
||||
|
@ -773,7 +773,7 @@ static int choose_partitioning(VP9_COMP *cpi,
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 9; i++) {
|
||||
for (i = 0; i < 25; i++) {
|
||||
x->variance_low[i] = 0;
|
||||
}
|
||||
|
||||
@ -1083,28 +1083,53 @@ static int choose_partitioning(VP9_COMP *cpi,
|
||||
}
|
||||
|
||||
if (cpi->sf.short_circuit_low_temp_var) {
|
||||
// Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was
|
||||
// selected.
|
||||
if (ref_frame_partition == LAST_FRAME) {
|
||||
int mv_thr = cm->width > 640 ? 8 : 4;
|
||||
// Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected
|
||||
// and int_pro mv is small. If the temporal variance is small set the
|
||||
// variance_low flag for the block. The variance threshold can be adjusted,
|
||||
// the higher the more aggressive.
|
||||
if (ref_frame_partition == LAST_FRAME &&
|
||||
(cpi->sf.short_circuit_low_temp_var == 1 ||
|
||||
(xd->mi[0]->mv[0].as_mv.col < mv_thr &&
|
||||
xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
|
||||
xd->mi[0]->mv[0].as_mv.row < mv_thr &&
|
||||
xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
|
||||
if (xd->mi[0]->sb_type == BLOCK_64X64 &&
|
||||
vt.part_variances.none.variance < (thresholds[0] >> 1)) {
|
||||
x->variance_low[0] = 1;
|
||||
} else if (xd->mi[0]->sb_type == BLOCK_64X32) {
|
||||
if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2))
|
||||
x->variance_low[1] = 1;
|
||||
if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2))
|
||||
x->variance_low[2] = 1;
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (vt.part_variances.horz[j].variance < (thresholds[0] >> 2))
|
||||
x->variance_low[j + 1] = 1;
|
||||
}
|
||||
} else if (xd->mi[0]->sb_type == BLOCK_32X64) {
|
||||
if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2))
|
||||
x->variance_low[3] = 1;
|
||||
if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2))
|
||||
x->variance_low[4] = 1;
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (vt.part_variances.vert[j].variance < (thresholds[0] >> 2))
|
||||
x->variance_low[j + 3] = 1;
|
||||
}
|
||||
} else {
|
||||
// 32x32
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (!force_split[i + 1] &&
|
||||
vt.split[i].part_variances.none.variance < (thresholds[1] >> 1))
|
||||
x->variance_low[i + 5] = 1;
|
||||
if (!force_split[i + 1]) {
|
||||
// 32x32
|
||||
if (vt.split[i].part_variances.none.variance <
|
||||
(thresholds[1] >> 1))
|
||||
x->variance_low[i + 5] = 1;
|
||||
} else if (cpi->sf.short_circuit_low_temp_var == 2) {
|
||||
int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4};
|
||||
const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i];
|
||||
MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
|
||||
// For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
|
||||
// inside.
|
||||
if ((*this_mi)->sb_type == BLOCK_16X16 ||
|
||||
(*this_mi)->sb_type == BLOCK_32X16 ||
|
||||
(*this_mi)->sb_type == BLOCK_16X32) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
if (vt.split[i].split[j].part_variances.none.variance <
|
||||
(thresholds[2] >> 8))
|
||||
x->variance_low[(i << 2) + j + 9] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -40,6 +40,14 @@ typedef struct {
|
||||
int in_use;
|
||||
} PRED_BUFFER;
|
||||
|
||||
|
||||
static const int pos_shift_16x16[4][4] = {
|
||||
{9, 10, 13, 14},
|
||||
{11, 12, 15, 16},
|
||||
{17, 18, 21, 22},
|
||||
{19, 20, 23, 24}
|
||||
};
|
||||
|
||||
static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm,
|
||||
const MACROBLOCK *x,
|
||||
const MACROBLOCKD *xd,
|
||||
@ -1274,6 +1282,8 @@ static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low,
|
||||
int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize) {
|
||||
int force_skip_low_temp_var = 0;
|
||||
int i = (mi_row & 0x7) >> 1;
|
||||
int j = (mi_col & 0x7) >> 1;
|
||||
// Set force_skip_low_temp_var based on the block size and block offset.
|
||||
if (bsize == BLOCK_64X64) {
|
||||
force_skip_low_temp_var = variance_low[0];
|
||||
@ -1299,6 +1309,19 @@ static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low,
|
||||
} else if ((mi_col & 0x7) && (mi_row & 0x7)) {
|
||||
force_skip_low_temp_var = variance_low[8];
|
||||
}
|
||||
} else if (bsize == BLOCK_16X16) {
|
||||
force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
|
||||
} else if (bsize == BLOCK_32X16) {
|
||||
// The col shift index for the second 16x16 block.
|
||||
int j2 = ((mi_col + 2) & 0x7) >> 1;
|
||||
// Only if each 16x16 block inside has low temporal variance.
|
||||
force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
|
||||
variance_low[pos_shift_16x16[i][j2]];
|
||||
} else if (bsize == BLOCK_16X32) {
|
||||
// The row shift index for the second 16x16 block.
|
||||
int i2 = ((mi_row + 2) & 0x7) >> 1;
|
||||
force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
|
||||
variance_low[pos_shift_16x16[i2][j]];
|
||||
}
|
||||
return force_skip_low_temp_var;
|
||||
}
|
||||
@ -1503,6 +1526,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cpi->sf.short_circuit_low_temp_var == 2 &&
|
||||
force_skip_low_temp_var && ref_frame == LAST_FRAME &&
|
||||
this_mode == NEWMV) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cpi->use_svc) {
|
||||
if (svc_force_zero_mode[ref_frame - 1] &&
|
||||
frame_mv[this_mode][ref_frame].as_int != 0)
|
||||
@ -1842,8 +1871,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
|
||||
}
|
||||
// Perform intra prediction search, if the best SAD is above a certain
|
||||
// threshold. Skip intra prediction if force_skip_low_temp_var is set.
|
||||
if (!force_skip_low_temp_var && perform_intra_pred &&
|
||||
// threshold.
|
||||
if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) &&
|
||||
perform_intra_pred &&
|
||||
(best_rdc.rdcost == INT64_MAX ||
|
||||
(!x->skip && best_rdc.rdcost > inter_mode_thresh &&
|
||||
bsize <= cpi->sf.max_intra_bsize))) {
|
||||
|
@ -429,7 +429,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
||||
sf->mv.search_method = NSTEP;
|
||||
sf->mv.reduce_first_step_size = 1;
|
||||
sf->skip_encode_sb = 0;
|
||||
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 &&
|
||||
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
|
||||
content != VP9E_CONTENT_SCREEN) {
|
||||
// Enable short circuit for low temporal variance.
|
||||
sf->short_circuit_low_temp_var = 1;
|
||||
@ -450,6 +450,17 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
||||
sf->adaptive_rd_thresh = 4;
|
||||
sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2;
|
||||
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
|
||||
// Only keep INTRA_DC mode for speed 8.
|
||||
if (!is_keyframe) {
|
||||
int i = 0;
|
||||
for (i = 0; i < BLOCK_SIZES; ++i)
|
||||
sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
|
||||
}
|
||||
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
|
||||
content != VP9E_CONTENT_SCREEN) {
|
||||
// More aggressive short circuit for speed 8.
|
||||
sf->short_circuit_low_temp_var = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -449,6 +449,10 @@ typedef struct SPEED_FEATURES {
|
||||
|
||||
// Skip a number of expensive mode evaluations for blocks with very low
|
||||
// temporal variance.
|
||||
// 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32.
|
||||
// 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL
|
||||
// INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and
|
||||
// 32x16.
|
||||
int short_circuit_low_temp_var;
|
||||
} SPEED_FEATURES;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user