From 01a37177d167f2f837ae6d2de03a17511d1b38c9 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Tue, 20 Aug 2013 14:34:17 -0700 Subject: [PATCH] Refactor rd_pick_partition for parameter control This commit changes the partition search order of superblocks from {SPLIT, NONE, HORZ, VERT} to {NONE, SPLIT, HORZ, VERT} for consistency with that of sub8x8 partition search. It enable the use of early termination in partition search for all block sizes. For ped_area_1080p 50 frames coded at 4000 kbps, it makes the runtime goes down from 844305ms -> 818003ms (3% speed-up) at speed 0. This will further move towards making the in-search partition types configurable, hence unifying various speed-up approaches. Some speed 1 and 2 features are turned off during the refactoring process, including: disable_split_var_thresh using_small_partition_info Stricter constraints are applied to use_square_partition_only for right/bottom boundary blocks. Will bring back/refine these features subsequently. At this point, it makes derf set at speed 1 about 0.45% higher in compression performance, and 9% down in run-time. Change-Id: I3db9f9d1d1a0d6cbe2e50e49bd9eda1cf705f37c --- vp9/encoder/vp9_encodeframe.c | 377 +++++++++++++++------------------- vp9/encoder/vp9_onyx_if.c | 7 +- 2 files changed, 172 insertions(+), 212 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 160833baf..802cf3795 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1671,29 +1671,19 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int i, pl; BLOCK_SIZE_TYPE subsize; int this_rate, sum_rate = 0, best_rate = INT_MAX; - int64_t this_dist, sum_dist = 0, best_dist = INT_MAX; + int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX; int64_t sum_rd = 0; - int do_split = 1, do_rect = 1; - // Override min_partition_size for edge blocks - int force_horz_split = mi_row + (ms >> 1) >= cm->mi_rows; - int force_vert_split = mi_col + (ms >> 1) >= cm->mi_cols; - const int partition_none_allowed = (bsize <= cpi->sf.max_partition_size || - !cpi->sf.auto_min_max_partition_size) && - !force_horz_split && - !force_vert_split; - const int partition_horz_allowed = (bsize <= cpi->sf.max_partition_size || - !cpi->sf.auto_min_max_partition_size) && - !cpi->sf.use_square_partition_only && - bsize >= BLOCK_8X8 && - !force_vert_split; - const int partition_vert_allowed = (bsize <= cpi->sf.max_partition_size || - !cpi->sf.auto_min_max_partition_size) && - !cpi->sf.use_square_partition_only && - bsize >= BLOCK_8X8 && - !force_horz_split; + int do_split = bsize >= BLOCK_8X8; + int do_rect = 1; + // Override skipping rectangular partition operations for edge blocks + const int force_horz_split = (mi_row + (ms >> 1) >= cm->mi_rows); + const int force_vert_split = (mi_col + (ms >> 1) >= cm->mi_cols); + + int partition_none_allowed = !force_horz_split && !force_vert_split; + int partition_horz_allowed = !force_vert_split && bsize >= BLOCK_8X8; + int partition_vert_allowed = !force_horz_split && bsize >= BLOCK_8X8; + int partition_split_done = 0; - - (void) *tp_orig; if (bsize < BLOCK_8X8) { @@ -1707,6 +1697,24 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, } assert(mi_height_log2(bsize) == mi_width_log2(bsize)); + // Determine partition types in search according to the speed features. + // The threshold set here has to be of square block size. + if (cpi->sf.auto_min_max_partition_size) { + partition_none_allowed &= (bsize <= cpi->sf.max_partition_size && + bsize >= cpi->sf.min_partition_size); + partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_horz_split); + partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_vert_split); + do_split &= bsize > cpi->sf.min_partition_size; + } + if (cpi->sf.use_square_partition_only) { + partition_horz_allowed &= force_horz_split; + partition_vert_allowed &= force_vert_split; + } + save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); if (cpi->sf.disable_split_var_thresh && partition_none_allowed) { @@ -1719,220 +1727,169 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, do_rect = 0; } } - // PARTITION_SPLIT - if (do_split && - (!cpi->sf.auto_min_max_partition_size || - bsize > cpi->sf.min_partition_size)) { - if (bsize > BLOCK_8X8) { - subsize = get_subsize(bsize, PARTITION_SPLIT); - for (i = 0; i < 4 && sum_rd < best_rd; ++i) { - int x_idx = (i & 1) * (ms >> 1); - int y_idx = (i >> 1) * (ms >> 1); - if ((mi_row + y_idx >= cm->mi_rows) || - (mi_col + x_idx >= cm->mi_cols)) - continue; - - *(get_sb_index(xd, subsize)) = i; - - rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, - &this_rate, &this_dist, i != 3, best_rd - sum_rd); - - if (this_rate == INT_MAX) { - sum_rd = INT64_MAX; - } else { - sum_rate += this_rate; - sum_dist += this_dist; - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - } - } - if (sum_rd < best_rd && i == 4) { + // PARTITION_NONE + if (partition_none_allowed) { + pick_sb_modes(cpi, mi_row, mi_col, &this_rate, &this_dist, bsize, + get_block_context(x, bsize), best_rd); + if (this_rate != INT_MAX) { + if (bsize >= BLOCK_8X8) { set_partition_seg_context(cm, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); - sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd) { - best_rate = sum_rate; - best_dist = sum_dist; - best_rd = sum_rd; - *(get_sb_partitioning(x, bsize)) = subsize; - } + this_rate += x->partition_cost[pl][PARTITION_NONE]; + } + sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); + if (sum_rd < best_rd) { + best_rate = this_rate; + best_dist = this_dist; + best_rd = sum_rd; + if (bsize >= BLOCK_8X8) + *(get_sb_partitioning(x, bsize)) = bsize; } - partition_split_done = 1; - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } + + // PARTITION_SPLIT + sum_rd = 0; + // TODO(jingning): use the motion vectors given by the above search as + // the starting point of motion search in the following partition type check. + if (do_split) { + subsize = get_subsize(bsize, PARTITION_SPLIT); + for (i = 0; i < 4 && sum_rd < best_rd; ++i) { + int x_idx = (i & 1) * (ms >> 1); + int y_idx = (i >> 1) * (ms >> 1); + + if ((mi_row + y_idx >= cm->mi_rows) || + (mi_col + x_idx >= cm->mi_cols)) + continue; + + *(get_sb_index(xd, subsize)) = i; + + rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, + &this_rate, &this_dist, i != 3, best_rd - sum_rd); + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd && i == 4) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } else { + // skip rectangular partition test when larger block size + // gives better rd cost + do_rect &= !partition_none_allowed && + cpi->sf.less_rectangular_check; + } + } + partition_split_done = 1; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } x->fast_ms = 0; x->pred_mv.as_int = 0; x->subblock_ref = 0; - // Use 4 subblocks' motion estimation results to speed up current - // partition's checking. if (partition_split_done && cpi->sf.using_small_partition_info) { compute_fast_motion_search_level(cpi, bsize); } - if (!cpi->sf.auto_min_max_partition_size || - bsize <= cpi->sf.max_partition_size) { - int larger_is_better = 0; + // PARTITION_HORZ + if (partition_horz_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_HORZ); + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize, + get_block_context(x, subsize), best_rd); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - // PARTITION_NONE - if (partition_none_allowed) { - pick_sb_modes(cpi, mi_row, mi_col, &this_rate, &this_dist, bsize, - get_block_context(x, bsize), best_rd); - if (this_rate != INT_MAX) { - if (bsize >= BLOCK_8X8) { - set_partition_seg_context(cm, xd, mi_row, mi_col); - pl = partition_plane_context(xd, bsize); - this_rate += x->partition_cost[pl][PARTITION_NONE]; - } - sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); - if (sum_rd < best_rd || bsize == BLOCK_8X8) { - best_rate = this_rate; - best_dist = this_dist; - best_rd = sum_rd; - larger_is_better = 1; - if (bsize >= BLOCK_8X8) - *(get_sb_partitioning(x, bsize)) = bsize; - } - } - } + if (sum_rd < best_rd && mi_row + (ms >> 1) < cm->mi_rows) { + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); - if (bsize == BLOCK_8X8 && do_split) { - sum_rate = 0; sum_dist = 0; sum_rd = 0; - - subsize = get_subsize(bsize, PARTITION_SPLIT); - - for (i = 0; i < 4 && sum_rd < best_rd; ++i) { - int x_idx = (i & 1) * (ms >> 1); - int y_idx = (i >> 1) * (ms >> 1); - - if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) - continue; - - *(get_sb_index(xd, subsize)) = i; - rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, - &this_rate, &this_dist, i != 3, best_rd - sum_rd); - - if (this_rate == INT_MAX) { - sum_rd = INT64_MAX; - } else { - sum_rate += this_rate; - sum_dist += this_dist; - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - } - } - if (sum_rd < best_rd && i == 4) { - set_partition_seg_context(cm, xd, mi_row, mi_col); - pl = partition_plane_context(xd, bsize); - sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, &this_rate, + &this_dist, subsize, get_block_context(x, subsize), + best_rd - sum_rd); + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + sum_rate += this_rate; + sum_dist += this_dist; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd) { - best_rate = sum_rate; - best_dist = sum_dist; - best_rd = sum_rd; - larger_is_better = 0; - *(get_sb_partitioning(x, bsize)) = subsize; - } - } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); - } - - if (do_rect && - !cpi->sf.use_square_partition_only && - (!cpi->sf.less_rectangular_check || !larger_is_better)) { - // PARTITION_HORZ - if (partition_horz_allowed) { - subsize = get_subsize(bsize, PARTITION_HORZ); - if (!cpi->sf.auto_min_max_partition_size || force_horz_split || - subsize >= cpi->sf.min_partition_size) { - *(get_sb_index(xd, subsize)) = 0; - pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize, - get_block_context(x, subsize), best_rd); - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - - if (sum_rd < best_rd && mi_row + (ms >> 1) < cm->mi_rows) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); - - *(get_sb_index(xd, subsize)) = 1; - pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, &this_rate, - &this_dist, subsize, get_block_context(x, subsize), - best_rd - sum_rd); - if (this_rate == INT_MAX) { - sum_rd = INT64_MAX; - } else { - sum_rate += this_rate; - sum_dist += this_dist; - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - } - } - if (sum_rd < best_rd) { - set_partition_seg_context(cm, xd, mi_row, mi_col); - pl = partition_plane_context(xd, bsize); - sum_rate += x->partition_cost[pl][PARTITION_HORZ]; - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd) { - best_rd = sum_rd; - best_rate = sum_rate; - best_dist = sum_dist; - *(get_sb_partitioning(x, bsize)) = subsize; - } - } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); - } - } - - // PARTITION_VERT - if (partition_vert_allowed) { - subsize = get_subsize(bsize, PARTITION_VERT); - if (!cpi->sf.auto_min_max_partition_size || force_vert_split || - subsize >= cpi->sf.min_partition_size) { - *(get_sb_index(xd, subsize)) = 0; - pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize, - get_block_context(x, subsize), best_rd); - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_col + (ms >> 1) < cm->mi_cols) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); - - *(get_sb_index(xd, subsize)) = 1; - pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), &this_rate, - &this_dist, subsize, get_block_context(x, subsize), - best_rd - sum_rd); - if (this_rate == INT_MAX) { - sum_rd = INT64_MAX; - } else { - sum_rate += this_rate; - sum_dist += this_dist; - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - } - } - if (sum_rd < best_rd) { - set_partition_seg_context(cm, xd, mi_row, mi_col); - pl = partition_plane_context(xd, bsize); - sum_rate += x->partition_cost[pl][PARTITION_VERT]; - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd) { - best_rate = sum_rate; - best_dist = sum_dist; - best_rd = sum_rd; - *(get_sb_partitioning(x, bsize)) = subsize; - } - } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); - } } } + if (sum_rd < best_rd) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + sum_rate += x->partition_cost[pl][PARTITION_HORZ]; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (sum_rd < best_rd) { + best_rd = sum_rd; + best_rate = sum_rate; + best_dist = sum_dist; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } + + // PARTITION_VERT + if (partition_vert_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_VERT); + + *(get_sb_index(xd, subsize)) = 0; + pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize, + get_block_context(x, subsize), best_rd); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (sum_rd < best_rd && mi_col + (ms >> 1) < cm->mi_cols) { + update_state(cpi, get_block_context(x, subsize), subsize, 0); + encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + + *(get_sb_index(xd, subsize)) = 1; + pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), &this_rate, + &this_dist, subsize, get_block_context(x, subsize), + best_rd - sum_rd); + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + set_partition_seg_context(cm, xd, mi_row, mi_col); + pl = partition_plane_context(xd, bsize); + sum_rate += x->partition_cost[pl][PARTITION_VERT]; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + } + + *rate = best_rate; *dist = best_dist; - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); - - if (best_rate < INT_MAX && best_dist < INT_MAX && do_recon) + if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize); if (bsize == BLOCK_64X64) { assert(tp_orig < *tp); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index cef12ffa6..83dc90426 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -793,7 +793,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->auto_min_max_partition_size = 1; sf->auto_min_max_partition_interval = 1; - sf->disable_split_var_thresh = 16; + // FIXME(jingning): temporarily turn off disable_split_var_thresh + // during refactoring process. will get this back after finishing + // the main framework of partition search type. + sf->disable_split_var_thresh = 0; sf->disable_filter_search_var_thresh = 16; } if (speed == 2) { @@ -821,7 +824,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; sf->use_lp32x32fdct = 1; - sf->using_small_partition_info = 1; + sf->using_small_partition_info = 0; sf->disable_splitmv = (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; sf->auto_mv_step_size = 1;