From 4d2c37692345f588e00bd33faefcdb17bd27835b Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Thu, 14 Aug 2014 17:25:21 -0700 Subject: [PATCH] Early termination in encoding partition search In the partition search, the encoder checks all possible partitionings in the superblock's partition search tree. This patch proposed a set of criteria for partition search early termination, which effectively decided whether or not to terminate the search in current branch based on the "skippable" result of the quantized transform coefficients. The "skippable" information was gathered during the partition mode search, and no overhead calculations were introduced. This patch gives significant encoding speed gains without sacrificing the quality. Borg test results: 1. At speed 1, stdhd set: psnr: +0.074%, ssim: +0.093%; derf set: psnr: -0.024%, ssim: +0.011%; 2. At speed 2, stdhd set: psnr: +0.033%, ssim: +0.100%; derf set: psnr: -0.062%, ssim: +0.003%; 3. At speed 3, stdhd set: psnr: +0.060%, ssim: +0.190%; derf set: psnr: -0.064%, ssim: -0.002%; 4. At speed 4, stdhd set: psnr: +0.070%, ssim: +0.143%; derf set: psnr: -0.104%, ssim: +0.039%; The speedup ranges from several percent to 60+%. speed1 speed2 speed3 speed4 (1080p, 100f): old_town_cross: 48.2% 23.9% 20.8% 16.5% park_joy: 11.4% 17.8% 29.4% 18.2% pedestrian_area: 10.7% 4.0% 4.2% 2.4% (720p, 200f): mobcal: 68.1% 36.3% 34.4% 17.7% parkrun: 15.8% 24.2% 37.1% 16.8% shields: 45.1% 32.8% 30.1% 9.6% (cif, 300f) bus: 3.7% 10.4% 14.0% 7.9% deadline: 13.6% 14.8% 12.6% 10.9% mobile: 5.3% 11.5% 14.7% 10.7% Change-Id: I246c38fb952ad762ce5e365711235b605f470a66 --- vp9/encoder/vp9_context_tree.h | 3 +++ vp9/encoder/vp9_encodeframe.c | 21 +++++++++++++-------- vp9/encoder/vp9_rdopt.c | 15 ++++++++++----- vp9/encoder/vp9_speed_features.c | 26 ++++++++++++++++++++++++++ vp9/encoder/vp9_speed_features.h | 4 ++++ 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index d60e6c3eb..0cbb24429 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -34,6 +34,9 @@ typedef struct { int num_4x4_blk; int skip; int skip_txfm[MAX_MB_PLANE]; + // For current partition, only if all Y, U, and V transform blocks' + // coefficients are quantized to 0, skippable is set to 0. + int skippable; int best_mode_index; int hybrid_pred_diff; int comp_pred_diff; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 950a6c8bb..07134dc9d 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -727,6 +727,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, p[i].eobs = ctx->eobs_pbuf[i][0]; } ctx->is_coded = 0; + ctx->skippable = 0; x->skip_recode = 0; // Set to zero to make sure we do not use the previous encoded frame stats @@ -2158,8 +2159,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); if (sum_rd < best_rd) { - int64_t stop_thresh = 4096; - int64_t stop_thresh_rd; + int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr; + int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr; best_rate = this_rate; best_dist = this_dist; @@ -2167,14 +2168,18 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - // Adjust threshold according to partition size. - stop_thresh >>= 8 - (b_width_log2(bsize) + + // Adjust dist breakout threshold according to the partition size. + dist_breakout_thr >>= 8 - (b_width_log2(bsize) + b_height_log2(bsize)); - stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh); - // If obtained distortion is very small, choose current partition - // and stop splitting. - if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) { + // If all y, u, v transform blocks in this partition are skippable, and + // the dist & rate are within the thresholds, the partition search is + // terminated for current branch of the partition search tree. + // The dist & rate thresholds are set to 0 at speed 0 to disable the + // early termination at that speed. + if (!x->e_mbd.lossless && + (ctx->skippable && best_dist < dist_breakout_thr && + best_rate < rate_breakout_thr)) { do_split = 0; do_rect = 0; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 2efa3db52..f73006ef7 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1720,12 +1720,14 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, int64_t comp_pred_diff[REFERENCE_MODES], const int64_t tx_size_diff[TX_MODES], - int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { + int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], + int skippable) { MACROBLOCKD *const xd = &x->e_mbd; // Take a snapshot of the coding context so it can be // restored if we decide to encode this way ctx->skip = x->skip; + ctx->skippable = skippable; ctx->best_mode_index = mode_index; ctx->mic = *xd->mi[0]; ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; @@ -2556,6 +2558,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MB_MODE_INFO best_mbmode; + int best_mode_skippable = 0; int mode_index, best_mode_index = -1; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; @@ -2963,6 +2966,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_rd = this_rd; best_mbmode = *mbmi; best_skip2 = this_skip2; + best_mode_skippable = skippable; + if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], @@ -3119,8 +3124,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - store_coding_context(x, ctx, best_mode_index, - best_pred_diff, best_tx_diff, best_filter_diff); + store_coding_context(x, ctx, best_mode_index, best_pred_diff, + best_tx_diff, best_filter_diff, best_mode_skippable); return best_rd; } @@ -3225,7 +3230,7 @@ int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x, if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); store_coding_context(x, ctx, THR_ZEROMV, - best_pred_diff, best_tx_diff, best_filter_diff); + best_pred_diff, best_tx_diff, best_filter_diff, 0); return this_rd; } @@ -3830,7 +3835,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_ref_index, - best_pred_diff, best_tx_diff, best_filter_diff); + best_pred_diff, best_tx_diff, best_filter_diff, 0); return best_rd; } diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 0afcde535..0bead48fe 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -92,6 +92,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; sf->tx_size_search_breakout = 1; + + if (MIN(cm->width, cm->height) >= 720) + sf->partition_search_breakout_dist_thr = (1 << 23); + else + sf->partition_search_breakout_dist_thr = (1 << 21); + sf->partition_search_breakout_rate_thr = 500; } if (speed >= 2) { @@ -120,6 +126,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->adjust_partitioning_from_last_frame = 1; + + if (MIN(cm->width, cm->height) >= 720) + sf->partition_search_breakout_dist_thr = (1 << 24); + else + sf->partition_search_breakout_dist_thr = (1 << 22); + sf->partition_search_breakout_rate_thr = 700; } if (speed >= 3) { @@ -144,6 +156,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; sf->adaptive_interp_filter_search = 1; + + if (MIN(cm->width, cm->height) >= 720) + sf->partition_search_breakout_dist_thr = (1 << 25); + else + sf->partition_search_breakout_dist_thr = (1 << 23); + sf->partition_search_breakout_rate_thr = 1000; } if (speed >= 4) { @@ -158,6 +176,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->use_lp32x32fdct = 1; sf->use_fast_coef_updates = ONE_LOOP_REDUCED; sf->use_fast_coef_costing = 1; + + if (MIN(cm->width, cm->height) >= 720) + sf->partition_search_breakout_dist_thr = (1 << 26); + else + sf->partition_search_breakout_dist_thr = (1 << 24); + sf->partition_search_breakout_rate_thr = 1500; } if (speed >= 5) { @@ -411,6 +435,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->recode_tolerance = 25; sf->default_interp_filter = SWITCHABLE; sf->tx_size_search_breakout = 0; + sf->partition_search_breakout_dist_thr = 0; + sf->partition_search_breakout_rate_thr = 0; if (oxcf->mode == REALTIME) set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content); diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 46eedc147..e31185e59 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -393,6 +393,10 @@ typedef struct SPEED_FEATURES { // mask for skip evaluation of certain interp_filter type. INTERP_FILTER_MASK interp_filter_search_mask; + + // Partition search early breakout thresholds. + int64_t partition_search_breakout_dist_thr; + int partition_search_breakout_rate_thr; } SPEED_FEATURES; struct VP9_COMP;