Apply machine learning-based early termination in VP9 partition search
This patch was based on Yang Xian's intern project code. Further modifications were done. 1. Moved machine-learning related parameters into the context structure. 2. Corrected the calculation of sum_eobs. 3. Removed unused parameters and calculations. 4. Made it work with multiple tiles. 5. Added a speed feature for the machine-learning based partition search early termination. 6. Re-organized the code. The patch was rebased to the top-of-tree. Borg test BDRATE result: 4k set: PSNR: +0.144%; SSIM: +0.043%; hdres set: PSNR: +0.149%; SSIM: +0.269%; midres set: PSNR: +0.127%; SSIM: +0.257%; Average speed gain result: 4k clips: 22%; hd clips: 23%; midres clips: 15%. Change-Id: I0220e93a8277e6a7ea4b2c34b605966e3b1584ac
This commit is contained in:
parent
42a1b310e1
commit
670101439f
@ -71,6 +71,9 @@ typedef struct {
|
||||
// search loop
|
||||
MV pred_mv[MAX_REF_FRAMES];
|
||||
INTERP_FILTER pred_interp_filter;
|
||||
|
||||
// Used for the machine learning-based early termination
|
||||
int sum_eobs;
|
||||
} PICK_MODE_CONTEXT;
|
||||
|
||||
typedef struct PC_TREE {
|
||||
|
@ -52,6 +52,33 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
|
||||
int output_enabled, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
|
||||
|
||||
// Machine learning-based early termination parameters.
|
||||
static const double train_mean[24] = {
|
||||
303501.697372, 3042630.372158, 24.694696, 1.392182,
|
||||
689.413511, 162.027012, 1.478213, 0.0,
|
||||
135382.260230, 912738.513263, 28.845217, 1.515230,
|
||||
544.158492, 131.807995, 1.436863, 0.0,
|
||||
43682.377587, 208131.711766, 28.084737, 1.356677,
|
||||
138.254122, 119.522553, 1.252322, 0.0
|
||||
};
|
||||
|
||||
static const double train_stdm[24] = {
|
||||
673689.212982, 5996652.516628, 0.024449, 1.989792,
|
||||
985.880847, 0.014638, 2.001898, 0.0,
|
||||
208798.775332, 1812548.443284, 0.018693, 1.838009,
|
||||
396.986910, 0.015657, 1.332541, 0.0,
|
||||
55888.847031, 448587.962714, 0.017900, 1.904776,
|
||||
98.652832, 0.016598, 1.320992, 0.0
|
||||
};
|
||||
|
||||
// Error tolerance: 0.01%-0.0.05%-0.1%
|
||||
static const double classifiers[24] = {
|
||||
0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
|
||||
0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
|
||||
0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
|
||||
0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
|
||||
};
|
||||
|
||||
// This is used as a reference when computing the source variance for the
|
||||
// purpose of activity masking.
|
||||
// Eventually this should be replaced by custom no-reference routines,
|
||||
@ -2684,6 +2711,18 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
|
||||
}
|
||||
#endif
|
||||
|
||||
// Accumulate all tx blocks' eobs results got from the partition evaluation.
|
||||
static void accumulate_eobs(int plane, int block, int row, int col,
|
||||
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
|
||||
void *arg) {
|
||||
PICK_MODE_CONTEXT *ctx = (PICK_MODE_CONTEXT *)arg;
|
||||
(void)row;
|
||||
(void)col;
|
||||
(void)plane_bsize;
|
||||
(void)tx_size;
|
||||
ctx->sum_eobs += ctx->eobs_pbuf[plane][1][block];
|
||||
}
|
||||
|
||||
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
|
||||
// unlikely to be selected depending on previous rate-distortion optimization
|
||||
// results, for encoding speed-up.
|
||||
@ -2863,15 +2902,92 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
|
||||
best_rdc = this_rdc;
|
||||
if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
|
||||
|
||||
// If all y, u, v transform blocks in this partition are skippable, and
|
||||
// the dist & rate are within the thresholds, the partition search is
|
||||
// terminated for current branch of the partition search tree.
|
||||
if (!x->e_mbd.lossless && ctx->skippable &&
|
||||
((best_rdc.dist < (dist_breakout_thr >> 2)) ||
|
||||
(best_rdc.dist < dist_breakout_thr &&
|
||||
best_rdc.rate < rate_breakout_thr))) {
|
||||
do_split = 0;
|
||||
do_rect = 0;
|
||||
if (!cpi->sf.ml_partition_search_early_termination) {
|
||||
// If all y, u, v transform blocks in this partition are skippable,
|
||||
// and the dist & rate are within the thresholds, the partition search
|
||||
// is terminated for current branch of the partition search tree.
|
||||
if (!x->e_mbd.lossless && ctx->skippable &&
|
||||
((best_rdc.dist < (dist_breakout_thr >> 2)) ||
|
||||
(best_rdc.dist < dist_breakout_thr &&
|
||||
best_rdc.rate < rate_breakout_thr))) {
|
||||
do_split = 0;
|
||||
do_rect = 0;
|
||||
}
|
||||
} else {
|
||||
// Currently, the machine-learning based partition search early
|
||||
// termination is only used while bsize is 16x16, 32x32 or 64x64,
|
||||
// VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
|
||||
if (ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
|
||||
const double *clf;
|
||||
const double *mean;
|
||||
const double *sd;
|
||||
const int mag_mv =
|
||||
abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
|
||||
const int left_in_image = !!xd->left_mi;
|
||||
const int above_in_image = !!xd->above_mi;
|
||||
MODE_INFO **prev_mi =
|
||||
&cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row];
|
||||
int above_par = 0; // above_partitioning
|
||||
int left_par = 0; // left_partitioning
|
||||
int last_par = 0; // last_partitioning
|
||||
BLOCK_SIZE context_size;
|
||||
double score;
|
||||
int offset = 0;
|
||||
|
||||
assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
|
||||
|
||||
ctx->sum_eobs = 0;
|
||||
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
|
||||
accumulate_eobs, ctx);
|
||||
|
||||
if (above_in_image) {
|
||||
context_size = xd->above_mi->sb_type;
|
||||
if (context_size < bsize)
|
||||
above_par = 2;
|
||||
else if (context_size == bsize)
|
||||
above_par = 1;
|
||||
}
|
||||
|
||||
if (left_in_image) {
|
||||
context_size = xd->left_mi->sb_type;
|
||||
if (context_size < bsize)
|
||||
left_par = 2;
|
||||
else if (context_size == bsize)
|
||||
left_par = 1;
|
||||
}
|
||||
|
||||
if (prev_mi) {
|
||||
context_size = prev_mi[0]->sb_type;
|
||||
if (context_size < bsize)
|
||||
last_par = 2;
|
||||
else if (context_size == bsize)
|
||||
last_par = 1;
|
||||
}
|
||||
|
||||
if (bsize == BLOCK_64X64)
|
||||
offset = 0;
|
||||
else if (bsize == BLOCK_32X32)
|
||||
offset = 8;
|
||||
else if (bsize == BLOCK_16X16)
|
||||
offset = 16;
|
||||
|
||||
// early termination score calculation
|
||||
clf = &classifiers[offset];
|
||||
mean = &train_mean[offset];
|
||||
sd = &train_stdm[offset];
|
||||
score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) +
|
||||
clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) +
|
||||
clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) +
|
||||
clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) *
|
||||
sd[3]) +
|
||||
clf[4] * (((double)ctx->sum_eobs - mean[4]) / sd[4]) +
|
||||
clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) +
|
||||
clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7];
|
||||
if (score < 0) {
|
||||
do_split = 0;
|
||||
do_rect = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_FP_MB_STATS
|
||||
@ -2984,7 +3100,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
|
||||
pc_tree->partitioning = PARTITION_SPLIT;
|
||||
|
||||
// Rate and distortion based partition search termination clause.
|
||||
if (!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
|
||||
if (!cpi->sf.ml_partition_search_early_termination &&
|
||||
!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
|
||||
(best_rdc.dist < dist_breakout_thr &&
|
||||
best_rdc.rate < rate_breakout_thr))) {
|
||||
do_rect = 0;
|
||||
|
@ -71,7 +71,15 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
|
||||
sf->partition_search_breakout_thr.dist = (1 << 20);
|
||||
sf->partition_search_breakout_thr.rate = 80;
|
||||
|
||||
// Currently, the machine-learning based partition search early termination
|
||||
// is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
|
||||
if (VPXMIN(cm->width, cm->height) >= 480) {
|
||||
sf->ml_partition_search_early_termination = 1;
|
||||
}
|
||||
|
||||
if (speed >= 1) {
|
||||
sf->ml_partition_search_early_termination = 0;
|
||||
|
||||
if (VPXMIN(cm->width, cm->height) >= 720) {
|
||||
sf->disable_split_mask =
|
||||
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
|
||||
@ -586,6 +594,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
|
||||
// Some speed-up features even for best quality as minimal impact on quality.
|
||||
sf->partition_search_breakout_thr.dist = (1 << 19);
|
||||
sf->partition_search_breakout_thr.rate = 80;
|
||||
sf->ml_partition_search_early_termination = 0;
|
||||
|
||||
if (oxcf->mode == REALTIME) {
|
||||
set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
|
||||
|
@ -449,6 +449,9 @@ typedef struct SPEED_FEATURES {
|
||||
// Partition search early breakout thresholds.
|
||||
PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr;
|
||||
|
||||
// Machine-learning based partition search early termination
|
||||
int ml_partition_search_early_termination;
|
||||
|
||||
// Allow skipping partition search for still image frame
|
||||
int allow_partition_search_skip;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user