vpx/vp9/encoder/vp9_speed_features.c
Jingning Han ca2dcb7fed Chessboard pattern partition search
This commit enables a chessboard pattern constrained partition
search for 720p and above resolutions. The scheme applies stricter
partition search to alternative blocks based on its above/left
neighboring blocks' partition range, as well as that of the
collocated blocks in the previous frame. It is currently turned
on at 16x16 block size level. The chessboard pattern is flipped
per coding frame.

The speed 3 runtime is reduced:
park_joy_1080p, 652832 ms -> 607738 ms (7% speed-up)
pedestrian_area_1080p, 215998 ms -> 200589 ms (8% speed-up)

The compression performance is changed:
hd     -0.223%
stdhd  -0.295%

Change-Id: I2d4d123ae89f7171562f618febb4d81789575b19
2014-07-30 10:32:41 -07:00

427 lines
15 KiB
C

/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits.h>
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_speed_features.h"
enum {
INTRA_ALL = (1 << DC_PRED) |
(1 << V_PRED) | (1 << H_PRED) |
(1 << D45_PRED) | (1 << D135_PRED) |
(1 << D117_PRED) | (1 << D153_PRED) |
(1 << D207_PRED) | (1 << D63_PRED) |
(1 << TM_PRED),
INTRA_DC = (1 << DC_PRED),
INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) |
(1 << H_PRED)
};
enum {
INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
INTER_NEAREST = (1 << NEARESTMV),
INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV)
};
enum {
DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) |
(1 << THR_COMP_LA) |
(1 << THR_ALTR) |
(1 << THR_GOLD) |
(1 << THR_LAST),
DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) |
(1 << THR_COMP_LA) |
(1 << THR_ALTR) |
(1 << THR_GOLD)
};
static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
SPEED_FEATURES *sf, int speed) {
sf->adaptive_rd_thresh = 1;
sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW;
sf->allow_skip_recode = 1;
if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
: USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720)
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
else
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
sf->mv.auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
sf->mv.subpel_iters_per_step = 1;
sf->mode_skip_start = 10;
sf->adaptive_pred_interp_filter = 1;
sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
}
if (speed >= 2) {
if (MIN(cm->width, cm->height) >= 720) {
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->last_partitioning_redo_frequency = 3;
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->adaptive_pred_interp_filter = 0;
} else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
sf->last_partitioning_redo_frequency = 2;
sf->lf_motion_threshold = NO_MOTION_THRESHOLD;
}
sf->reference_masking = 1;
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->adjust_partitioning_from_last_frame = 1;
}
if (speed >= 3) {
sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
: USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->cb_partition_search = frame_is_boosted(cpi) ? 0 : 1;
} else {
sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
}
sf->adaptive_pred_interp_filter = 0;
sf->cb_pred_filter_search = 1;
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->last_partitioning_redo_frequency = 3;
sf->recode_loop = ALLOW_RECODE_KFMAXBW;
sf->adaptive_rd_thresh = 3;
sf->mode_skip_start = 6;
sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
sf->use_fast_coef_costing = 1;
}
if (speed >= 4) {
sf->use_square_partition_only = 1;
sf->tx_size_search_method = USE_LARGESTALL;
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->adaptive_rd_thresh = 4;
sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH |
FLAG_EARLY_TERMINATE;
sf->disable_filter_search_var_thresh = 200;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
sf->use_lp32x32fdct = 1;
}
if (speed >= 5) {
int i;
sf->partition_search_type = FIXED_PARTITION;
sf->optimize_coefficients = 0;
sf->mv.search_method = HEX;
sf->disable_filter_search_var_thresh = 500;
for (i = 0; i < TX_SIZES; ++i) {
sf->intra_y_mode_mask[i] = INTRA_DC;
sf->intra_uv_mode_mask[i] = INTRA_DC;
}
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
}
if (speed >= 6) {
sf->mv.reduce_first_step_size = 1;
}
}
static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
int speed) {
VP9_COMMON *const cm = &cpi->common;
const int frames_since_key =
cm->frame_type == KEY_FRAME ? 0 : cpi->rc.frames_since_key;
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
sf->use_fast_coef_costing = 1;
if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
: USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720)
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
else
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
sf->adaptive_pred_interp_filter = 1;
sf->mv.auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
}
if (speed >= 2) {
if (MIN(cm->width, cm->height) >= 720)
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
else
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
sf->reference_masking = 1;
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
}
if (speed >= 3) {
sf->use_square_partition_only = 1;
sf->disable_filter_search_var_thresh = 100;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
sf->constrain_copy_partition = 1;
sf->use_uv_intra_rd_estimate = 1;
sf->skip_encode_sb = 1;
sf->mv.subpel_iters_per_step = 1;
sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
sf->adaptive_rd_thresh = 4;
sf->mode_skip_start = 6;
sf->allow_skip_recode = 0;
sf->optimize_coefficients = 0;
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->lpf_pick = LPF_PICK_FROM_Q;
}
if (speed >= 4) {
int i;
sf->last_partitioning_redo_frequency = 4;
sf->adaptive_rd_thresh = 5;
sf->use_fast_coef_costing = 0;
sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
sf->adjust_partitioning_from_last_frame =
cm->last_frame_type != cm->frame_type || (0 ==
(frames_since_key + 1) % sf->last_partitioning_redo_frequency);
sf->mv.subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[i] = INTRA_DC;
}
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->frame_parameter_update = 0;
sf->mv.search_method = FAST_HEX;
sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
sf->max_intra_bsize = BLOCK_32X32;
sf->allow_skip_recode = 1;
}
if (speed >= 5) {
sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
sf->auto_min_max_partition_size = (cm->frame_type == KEY_FRAME) ?
RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
sf->max_partition_size = BLOCK_32X32;
sf->min_partition_size = BLOCK_8X8;
sf->partition_check =
(frames_since_key % sf->last_partitioning_redo_frequency == 1);
sf->force_frame_boost = cm->frame_type == KEY_FRAME ||
(frames_since_key %
(sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
sf->use_nonrd_pick_mode = 1;
sf->allow_skip_recode = 0;
}
if (speed >= 6) {
// Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->tx_size_search_method = (cm->frame_type == KEY_FRAME) ?
USE_LARGESTALL : USE_TX_8X8;
sf->max_intra_bsize = BLOCK_8X8;
// This feature is only enabled when partition search is disabled.
sf->reuse_inter_pred_sby = 1;
// Increase mode checking threshold for NEWMV.
sf->elevate_newmv_thresh = 2000;
sf->mv.reduce_first_step_size = 1;
}
if (speed >= 7) {
sf->mv.search_method = FAST_DIAMOND;
sf->mv.fullpel_search_step_param = 10;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
800 : 300;
sf->elevate_newmv_thresh = 2500;
}
if (speed >= 12) {
sf->elevate_newmv_thresh = 4000;
sf->mv.subpel_force_stop = 2;
}
if (speed >= 13) {
int i;
sf->max_intra_bsize = BLOCK_32X32;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_NEAREST;
}
}
void vp9_set_speed_features(VP9_COMP *cpi) {
SPEED_FEATURES *const sf = &cpi->sf;
VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
int i;
// best quality defaults
sf->frame_parameter_update = 1;
sf->mv.search_method = NSTEP;
sf->recode_loop = ALLOW_RECODE;
sf->mv.subpel_search_method = SUBPEL_TREE;
sf->mv.subpel_iters_per_step = 2;
sf->mv.subpel_force_stop = 0;
sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf);
sf->mv.reduce_first_step_size = 0;
sf->mv.auto_mv_step_size = 0;
sf->mv.fullpel_search_step_param = 6;
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->adaptive_rd_thresh = 0;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
sf->adaptive_pred_interp_filter = 0;
sf->cb_pred_filter_search = 0;
sf->cb_partition_search = 0;
sf->use_quant_fp = 0;
sf->reference_masking = 0;
sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE;
sf->max_partition_size = BLOCK_64X64;
sf->min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0;
sf->last_partitioning_redo_frequency = 4;
sf->constrain_copy_partition = 0;
sf->disable_split_mask = 0;
sf->mode_search_skip_flags = 0;
sf->force_frame_boost = 0;
sf->max_delta_qindex = 0;
sf->disable_split_var_thresh = 0;
sf->disable_filter_search_var_thresh = 0;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
sf->intra_uv_mode_mask[i] = INTRA_ALL;
}
sf->use_rd_breakout = 0;
sf->skip_encode_sb = 0;
sf->use_uv_intra_rd_estimate = 0;
sf->allow_skip_recode = 0;
sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
sf->use_fast_coef_updates = TWO_LOOP;
sf->use_fast_coef_costing = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->use_nonrd_pick_mode = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;
sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
sf->always_this_block_size = BLOCK_16X16;
sf->search_type_check_frequency = 50;
sf->encode_breakout_thresh = 0;
sf->elevate_newmv_thresh = 0;
// Recode loop tolerence %.
sf->recode_tolerance = 25;
sf->default_interp_filter = SWITCHABLE;
switch (oxcf->mode) {
case ONE_PASS_BEST:
case TWO_PASS_SECOND_BEST: // This is the best quality mode.
cpi->diamond_search_sad = vp9_full_range_search;
break;
case TWO_PASS_FIRST:
case ONE_PASS_GOOD:
case TWO_PASS_SECOND_GOOD:
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
break;
case REALTIME:
set_rt_speed_feature(cpi, sf, oxcf->speed);
break;
}
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
if (cpi->pass == 1)
sf->optimize_coefficients = 0;
// No recode for 1 pass.
if (cpi->pass == 0) {
sf->recode_loop = DISALLOW_RECODE;
sf->optimize_coefficients = 0;
}
if (sf->mv.subpel_search_method == SUBPEL_TREE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
}
cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1;
if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
sf->adaptive_pred_interp_filter = 0;
if (!cpi->oxcf.frame_periodic_boost) {
sf->max_delta_qindex = 0;
}
if (cpi->encode_breakout && oxcf->mode == REALTIME &&
sf->encode_breakout_thresh > cpi->encode_breakout)
cpi->encode_breakout = sf->encode_breakout_thresh;
}