Early termination in encoding partition search

In the partition search, the encoder checks all possible
partitionings in the superblock's partition search tree.
This patch proposed a set of criteria for partition search
early termination, which effectively decided whether or
not to terminate the search in current branch based on the
"skippable" result of the quantized transform coefficients.
The "skippable" information was gathered during the
partition mode search, and no overhead calculations were
introduced.

This patch gives significant encoding speed gains without
sacrificing the quality.

Borg test results:
1. At speed 1,
   stdhd set: psnr: +0.074%, ssim: +0.093%;
   derf set:  psnr: -0.024%, ssim: +0.011%;
2. At speed 2,
   stdhd set: psnr: +0.033%, ssim: +0.100%;
   derf set:  psnr: -0.062%, ssim: +0.003%;
3. At speed 3,
   stdhd set: psnr: +0.060%, ssim: +0.190%;
   derf set:  psnr: -0.064%, ssim: -0.002%;
4. At speed 4,
   stdhd set: psnr: +0.070%, ssim: +0.143%;
   derf set:  psnr: -0.104%, ssim: +0.039%;

The speedup ranges from several percent to 60+%.
                 speed1    speed2    speed3    speed4
(1080p, 100f):
old_town_cross:  48.2%     23.9%     20.8%     16.5%
park_joy:        11.4%     17.8%     29.4%     18.2%
pedestrian_area: 10.7%      4.0%      4.2%      2.4%
(720p, 200f):
mobcal:          68.1%     36.3%     34.4%     17.7%
parkrun:         15.8%     24.2%     37.1%     16.8%
shields:         45.1%     32.8%     30.1%      9.6%
(cif, 300f)
bus:              3.7%     10.4%     14.0%      7.9%
deadline:        13.6%     14.8%     12.6%     10.9%
mobile:           5.3%     11.5%     14.7%     10.7%

Change-Id: I246c38fb952ad762ce5e365711235b605f470a66
This commit is contained in:
Yunqing Wang 2014-08-14 17:25:21 -07:00
parent bb2a9abb1e
commit 4d2c376923
5 changed files with 56 additions and 13 deletions

View File

@ -34,6 +34,9 @@ typedef struct {
int num_4x4_blk; int num_4x4_blk;
int skip; int skip;
int skip_txfm[MAX_MB_PLANE]; int skip_txfm[MAX_MB_PLANE];
// For current partition, only if all Y, U, and V transform blocks'
// coefficients are quantized to 0, skippable is set to 0.
int skippable;
int best_mode_index; int best_mode_index;
int hybrid_pred_diff; int hybrid_pred_diff;
int comp_pred_diff; int comp_pred_diff;

View File

@ -727,6 +727,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
p[i].eobs = ctx->eobs_pbuf[i][0]; p[i].eobs = ctx->eobs_pbuf[i][0];
} }
ctx->is_coded = 0; ctx->is_coded = 0;
ctx->skippable = 0;
x->skip_recode = 0; x->skip_recode = 0;
// Set to zero to make sure we do not use the previous encoded frame stats // Set to zero to make sure we do not use the previous encoded frame stats
@ -2158,8 +2159,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
if (sum_rd < best_rd) { if (sum_rd < best_rd) {
int64_t stop_thresh = 4096; int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
int64_t stop_thresh_rd; int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
best_rate = this_rate; best_rate = this_rate;
best_dist = this_dist; best_dist = this_dist;
@ -2167,14 +2168,18 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (bsize >= BLOCK_8X8) if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE; pc_tree->partitioning = PARTITION_NONE;
// Adjust threshold according to partition size. // Adjust dist breakout threshold according to the partition size.
stop_thresh >>= 8 - (b_width_log2(bsize) + dist_breakout_thr >>= 8 - (b_width_log2(bsize) +
b_height_log2(bsize)); b_height_log2(bsize));
stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh); // If all y, u, v transform blocks in this partition are skippable, and
// If obtained distortion is very small, choose current partition // the dist & rate are within the thresholds, the partition search is
// and stop splitting. // terminated for current branch of the partition search tree.
if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) { // The dist & rate thresholds are set to 0 at speed 0 to disable the
// early termination at that speed.
if (!x->e_mbd.lossless &&
(ctx->skippable && best_dist < dist_breakout_thr &&
best_rate < rate_breakout_thr)) {
do_split = 0; do_split = 0;
do_rect = 0; do_rect = 0;
} }

View File

@ -1720,12 +1720,14 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int mode_index, int mode_index,
int64_t comp_pred_diff[REFERENCE_MODES], int64_t comp_pred_diff[REFERENCE_MODES],
const int64_t tx_size_diff[TX_MODES], const int64_t tx_size_diff[TX_MODES],
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
int skippable) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
// Take a snapshot of the coding context so it can be // Take a snapshot of the coding context so it can be
// restored if we decide to encode this way // restored if we decide to encode this way
ctx->skip = x->skip; ctx->skip = x->skip;
ctx->skippable = skippable;
ctx->best_mode_index = mode_index; ctx->best_mode_index = mode_index;
ctx->mic = *xd->mi[0]; ctx->mic = *xd->mi[0];
ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
@ -2556,6 +2558,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode; MB_MODE_INFO best_mbmode;
int best_mode_skippable = 0;
int mode_index, best_mode_index = -1; int mode_index, best_mode_index = -1;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vp9_prob comp_mode_p; vp9_prob comp_mode_p;
@ -2963,6 +2966,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_rd = this_rd; best_rd = this_rd;
best_mbmode = *mbmi; best_mbmode = *mbmi;
best_skip2 = this_skip2; best_skip2 = this_skip2;
best_mode_skippable = skippable;
if (!x->select_tx_size) if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane); swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
@ -3119,8 +3124,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
} }
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
store_coding_context(x, ctx, best_mode_index, store_coding_context(x, ctx, best_mode_index, best_pred_diff,
best_pred_diff, best_tx_diff, best_filter_diff); best_tx_diff, best_filter_diff, best_mode_skippable);
return best_rd; return best_rd;
} }
@ -3225,7 +3230,7 @@ int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
if (!x->select_tx_size) if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
store_coding_context(x, ctx, THR_ZEROMV, store_coding_context(x, ctx, THR_ZEROMV,
best_pred_diff, best_tx_diff, best_filter_diff); best_pred_diff, best_tx_diff, best_filter_diff, 0);
return this_rd; return this_rd;
} }
@ -3830,7 +3835,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
store_coding_context(x, ctx, best_ref_index, store_coding_context(x, ctx, best_ref_index,
best_pred_diff, best_tx_diff, best_filter_diff); best_pred_diff, best_tx_diff, best_filter_diff, 0);
return best_rd; return best_rd;
} }

View File

@ -92,6 +92,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
sf->tx_size_search_breakout = 1; sf->tx_size_search_breakout = 1;
if (MIN(cm->width, cm->height) >= 720)
sf->partition_search_breakout_dist_thr = (1 << 23);
else
sf->partition_search_breakout_dist_thr = (1 << 21);
sf->partition_search_breakout_rate_thr = 500;
} }
if (speed >= 2) { if (speed >= 2) {
@ -120,6 +126,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX; sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->adjust_partitioning_from_last_frame = 1; sf->adjust_partitioning_from_last_frame = 1;
if (MIN(cm->width, cm->height) >= 720)
sf->partition_search_breakout_dist_thr = (1 << 24);
else
sf->partition_search_breakout_dist_thr = (1 << 22);
sf->partition_search_breakout_rate_thr = 700;
} }
if (speed >= 3) { if (speed >= 3) {
@ -144,6 +156,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1; sf->adaptive_interp_filter_search = 1;
if (MIN(cm->width, cm->height) >= 720)
sf->partition_search_breakout_dist_thr = (1 << 25);
else
sf->partition_search_breakout_dist_thr = (1 << 23);
sf->partition_search_breakout_rate_thr = 1000;
} }
if (speed >= 4) { if (speed >= 4) {
@ -158,6 +176,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->use_lp32x32fdct = 1; sf->use_lp32x32fdct = 1;
sf->use_fast_coef_updates = ONE_LOOP_REDUCED; sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
sf->use_fast_coef_costing = 1; sf->use_fast_coef_costing = 1;
if (MIN(cm->width, cm->height) >= 720)
sf->partition_search_breakout_dist_thr = (1 << 26);
else
sf->partition_search_breakout_dist_thr = (1 << 24);
sf->partition_search_breakout_rate_thr = 1500;
} }
if (speed >= 5) { if (speed >= 5) {
@ -411,6 +435,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->recode_tolerance = 25; sf->recode_tolerance = 25;
sf->default_interp_filter = SWITCHABLE; sf->default_interp_filter = SWITCHABLE;
sf->tx_size_search_breakout = 0; sf->tx_size_search_breakout = 0;
sf->partition_search_breakout_dist_thr = 0;
sf->partition_search_breakout_rate_thr = 0;
if (oxcf->mode == REALTIME) if (oxcf->mode == REALTIME)
set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content); set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content);

View File

@ -393,6 +393,10 @@ typedef struct SPEED_FEATURES {
// mask for skip evaluation of certain interp_filter type. // mask for skip evaluation of certain interp_filter type.
INTERP_FILTER_MASK interp_filter_search_mask; INTERP_FILTER_MASK interp_filter_search_mask;
// Partition search early breakout thresholds.
int64_t partition_search_breakout_dist_thr;
int partition_search_breakout_rate_thr;
} SPEED_FEATURES; } SPEED_FEATURES;
struct VP9_COMP; struct VP9_COMP;