Apply machine learning-based early termination in VP9 partition search

This patch was based on Yang Xian's intern project code. Further modifications
were done.
1. Moved machine-learning related parameters into the context structure.
2. Corrected the calculation of sum_eobs.
3. Removed unused parameters and calculations.
4. Made it work with multiple tiles.
5. Added a speed feature for the machine-learning based partition search
early termination.
6. Re-organized the code.

The patch was rebased to the top-of-tree.

Borg test BDRATE result:
4k set:     PSNR: +0.144%; SSIM: +0.043%;
hdres set:  PSNR: +0.149%; SSIM: +0.269%;
midres set: PSNR: +0.127%; SSIM: +0.257%;

Average speed gain result:
4k clips: 22%;
hd clips: 23%;
midres clips: 15%.

Change-Id: I0220e93a8277e6a7ea4b2c34b605966e3b1584ac
This commit is contained in:
Yunqing Wang 2017-02-27 14:26:15 -08:00
parent 42a1b310e1
commit 670101439f
4 changed files with 142 additions and 10 deletions

View File

@ -71,6 +71,9 @@ typedef struct {
// search loop
MV pred_mv[MAX_REF_FRAMES];
INTERP_FILTER pred_interp_filter;
// Used for the machine learning-based early termination
int sum_eobs;
} PICK_MODE_CONTEXT;
typedef struct PC_TREE {

View File

@ -52,6 +52,33 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int output_enabled, int mi_row, int mi_col,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
// Machine learning-based early termination parameters.
static const double train_mean[24] = {
303501.697372, 3042630.372158, 24.694696, 1.392182,
689.413511, 162.027012, 1.478213, 0.0,
135382.260230, 912738.513263, 28.845217, 1.515230,
544.158492, 131.807995, 1.436863, 0.0,
43682.377587, 208131.711766, 28.084737, 1.356677,
138.254122, 119.522553, 1.252322, 0.0
};
static const double train_stdm[24] = {
673689.212982, 5996652.516628, 0.024449, 1.989792,
985.880847, 0.014638, 2.001898, 0.0,
208798.775332, 1812548.443284, 0.018693, 1.838009,
396.986910, 0.015657, 1.332541, 0.0,
55888.847031, 448587.962714, 0.017900, 1.904776,
98.652832, 0.016598, 1.320992, 0.0
};
// Error tolerance: 0.01%-0.0.05%-0.1%
static const double classifiers[24] = {
0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
};
// This is used as a reference when computing the source variance for the
// purpose of activity masking.
// Eventually this should be replaced by custom no-reference routines,
@ -2684,6 +2711,18 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
}
#endif
// Accumulate all tx blocks' eobs results got from the partition evaluation.
static void accumulate_eobs(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
PICK_MODE_CONTEXT *ctx = (PICK_MODE_CONTEXT *)arg;
(void)row;
(void)col;
(void)plane_bsize;
(void)tx_size;
ctx->sum_eobs += ctx->eobs_pbuf[plane][1][block];
}
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
@ -2863,15 +2902,92 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
best_rdc = this_rdc;
if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
// If all y, u, v transform blocks in this partition are skippable, and
// the dist & rate are within the thresholds, the partition search is
// terminated for current branch of the partition search tree.
if (!x->e_mbd.lossless && ctx->skippable &&
((best_rdc.dist < (dist_breakout_thr >> 2)) ||
(best_rdc.dist < dist_breakout_thr &&
best_rdc.rate < rate_breakout_thr))) {
do_split = 0;
do_rect = 0;
if (!cpi->sf.ml_partition_search_early_termination) {
// If all y, u, v transform blocks in this partition are skippable,
// and the dist & rate are within the thresholds, the partition search
// is terminated for current branch of the partition search tree.
if (!x->e_mbd.lossless && ctx->skippable &&
((best_rdc.dist < (dist_breakout_thr >> 2)) ||
(best_rdc.dist < dist_breakout_thr &&
best_rdc.rate < rate_breakout_thr))) {
do_split = 0;
do_rect = 0;
}
} else {
// Currently, the machine-learning based partition search early
// termination is only used while bsize is 16x16, 32x32 or 64x64,
// VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
if (ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
const double *clf;
const double *mean;
const double *sd;
const int mag_mv =
abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
const int left_in_image = !!xd->left_mi;
const int above_in_image = !!xd->above_mi;
MODE_INFO **prev_mi =
&cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row];
int above_par = 0; // above_partitioning
int left_par = 0; // left_partitioning
int last_par = 0; // last_partitioning
BLOCK_SIZE context_size;
double score;
int offset = 0;
assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
ctx->sum_eobs = 0;
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
accumulate_eobs, ctx);
if (above_in_image) {
context_size = xd->above_mi->sb_type;
if (context_size < bsize)
above_par = 2;
else if (context_size == bsize)
above_par = 1;
}
if (left_in_image) {
context_size = xd->left_mi->sb_type;
if (context_size < bsize)
left_par = 2;
else if (context_size == bsize)
left_par = 1;
}
if (prev_mi) {
context_size = prev_mi[0]->sb_type;
if (context_size < bsize)
last_par = 2;
else if (context_size == bsize)
last_par = 1;
}
if (bsize == BLOCK_64X64)
offset = 0;
else if (bsize == BLOCK_32X32)
offset = 8;
else if (bsize == BLOCK_16X16)
offset = 16;
// early termination score calculation
clf = &classifiers[offset];
mean = &train_mean[offset];
sd = &train_stdm[offset];
score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) +
clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) +
clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) +
clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) *
sd[3]) +
clf[4] * (((double)ctx->sum_eobs - mean[4]) / sd[4]) +
clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) +
clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7];
if (score < 0) {
do_split = 0;
do_rect = 0;
}
}
}
#if CONFIG_FP_MB_STATS
@ -2984,7 +3100,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
pc_tree->partitioning = PARTITION_SPLIT;
// Rate and distortion based partition search termination clause.
if (!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
if (!cpi->sf.ml_partition_search_early_termination &&
!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
(best_rdc.dist < dist_breakout_thr &&
best_rdc.rate < rate_breakout_thr))) {
do_rect = 0;

View File

@ -71,7 +71,15 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
sf->partition_search_breakout_thr.dist = (1 << 20);
sf->partition_search_breakout_thr.rate = 80;
// Currently, the machine-learning based partition search early termination
// is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
if (VPXMIN(cm->width, cm->height) >= 480) {
sf->ml_partition_search_early_termination = 1;
}
if (speed >= 1) {
sf->ml_partition_search_early_termination = 0;
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
@ -586,6 +594,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
// Some speed-up features even for best quality as minimal impact on quality.
sf->partition_search_breakout_thr.dist = (1 << 19);
sf->partition_search_breakout_thr.rate = 80;
sf->ml_partition_search_early_termination = 0;
if (oxcf->mode == REALTIME) {
set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);

View File

@ -449,6 +449,9 @@ typedef struct SPEED_FEATURES {
// Partition search early breakout thresholds.
PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr;
// Machine-learning based partition search early termination
int ml_partition_search_early_termination;
// Allow skipping partition search for still image frame
int allow_partition_search_skip;