vpx/vp9/encoder/vp9_speed_features.c

859 lines
32 KiB
C
Raw Normal View History

/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits.h>
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_speed_features.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vpx_dsp/vpx_dsp_common.h"
Changes to exhaustive motion search. This change alters the nature and use of exhaustive motion search. Firstly any exhaustive search is preceded by a normal step search. The exhaustive search is only carried out if the distortion resulting from the step search is above a threshold value. Secondly the simple +/- 64 exhaustive search is replaced by a multi stage mesh based search where each stage has a range and step/interval size. Subsequent stages use the best position from the previous stage as the center of the search but use a reduced range and interval size. For example: stage 1: Range +/- 64 interval 4 stage 2: Range +/- 32 interval 2 stage 3: Range +/- 15 interval 1 This process, especially when it follows on from a normal step search, has shown itself to be almost as effective as a full range exhaustive search with step 1 but greatly lowers the computational complexity such that it can be used in some cases for speeds 0-2. This patch also removes a double exhaustive search for sub 8x8 blocks which also contained a bug (the two searches used different distortion metrics). For best quality in my test animation sequence this patch has almost no impact on quality but improves encode speed by more than 5X. Restricted use in good quality speeds 0-2 yields significant quality gains on the animation test of 0.2 - 0.5 db with only a small impact on encode speed. On most clips though the quality gain and speed impact are small. Change-Id: Id22967a840e996e1db273f6ac4ff03f4f52d49aa
2015-10-14 10:38:49 +01:00
// Mesh search patters for various speed settings
static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = {
{ 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
};
Changes to exhaustive motion search. This change alters the nature and use of exhaustive motion search. Firstly any exhaustive search is preceded by a normal step search. The exhaustive search is only carried out if the distortion resulting from the step search is above a threshold value. Secondly the simple +/- 64 exhaustive search is replaced by a multi stage mesh based search where each stage has a range and step/interval size. Subsequent stages use the best position from the previous stage as the center of the search but use a reduced range and interval size. For example: stage 1: Range +/- 64 interval 4 stage 2: Range +/- 32 interval 2 stage 3: Range +/- 15 interval 1 This process, especially when it follows on from a normal step search, has shown itself to be almost as effective as a full range exhaustive search with step 1 but greatly lowers the computational complexity such that it can be used in some cases for speeds 0-2. This patch also removes a double exhaustive search for sub 8x8 blocks which also contained a bug (the two searches used different distortion metrics). For best quality in my test animation sequence this patch has almost no impact on quality but improves encode speed by more than 5X. Restricted use in good quality speeds 0-2 yields significant quality gains on the animation test of 0.2 - 0.5 db with only a small impact on encode speed. On most clips though the quality gain and speed impact are small. Change-Id: Id22967a840e996e1db273f6ac4ff03f4f52d49aa
2015-10-14 10:38:49 +01:00
// Define 3 mesh density levels to control the number of searches.
#define MESH_DENSITY_LEVELS 3
static MESH_PATTERN
good_quality_mesh_patterns[MESH_DENSITY_LEVELS][MAX_MESH_STEP] = {
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
{ { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
Changes to exhaustive motion search. This change alters the nature and use of exhaustive motion search. Firstly any exhaustive search is preceded by a normal step search. The exhaustive search is only carried out if the distortion resulting from the step search is above a threshold value. Secondly the simple +/- 64 exhaustive search is replaced by a multi stage mesh based search where each stage has a range and step/interval size. Subsequent stages use the best position from the previous stage as the center of the search but use a reduced range and interval size. For example: stage 1: Range +/- 64 interval 4 stage 2: Range +/- 32 interval 2 stage 3: Range +/- 15 interval 1 This process, especially when it follows on from a normal step search, has shown itself to be almost as effective as a full range exhaustive search with step 1 but greatly lowers the computational complexity such that it can be used in some cases for speeds 0-2. This patch also removes a double exhaustive search for sub 8x8 blocks which also contained a bug (the two searches used different distortion metrics). For best quality in my test animation sequence this patch has almost no impact on quality but improves encode speed by more than 5X. Restricted use in good quality speeds 0-2 yields significant quality gains on the animation test of 0.2 - 0.5 db with only a small impact on encode speed. On most clips though the quality gain and speed impact are small. Change-Id: Id22967a840e996e1db273f6ac4ff03f4f52d49aa
2015-10-14 10:38:49 +01:00
};
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
static int frame_is_boosted(const VP9_COMP *cpi) {
return frame_is_kf_gf_arf(cpi) || vp9_is_upper_layer_key_frame(cpi);
}
// Sets a partition size down to which the auto partition code will always
// search (can go lower), based on the image dimensions. The logic here
// is that the extent to which ringing artefacts are offensive, depends
// partly on the screen area that over which they propogate. Propogation is
// limited by transform block size but the screen area take up by a given block
// size will be larger for a small image format stretched to full screen.
static BLOCK_SIZE set_partition_min_limit(VP9_COMMON *const cm) {
unsigned int screen_area = (cm->width * cm->height);
// Select block size based on image format size.
if (screen_area < 1280 * 720) {
// Formats smaller in area than 720P
return BLOCK_4X4;
} else if (screen_area < 1920 * 1080) {
// Format >= 720P and < 1080P
return BLOCK_8X8;
} else {
// Formats 1080P and up
return BLOCK_16X16;
}
}
static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
VP9_COMMON *const cm = &cpi->common;
// speed 0 features
sf->partition_search_breakout_thr.dist = (1 << 20);
sf->partition_search_breakout_thr.rate = 80;
// Currently, the machine-learning based partition search early termination
// is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
if (VPXMIN(cm->width, cm->height) >= 480) {
sf->ml_partition_search_early_termination = 1;
}
if (speed >= 1) {
sf->ml_partition_search_early_termination = 0;
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->partition_search_breakout_thr.dist = (1 << 23);
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
sf->partition_search_breakout_thr.dist = (1 << 21);
}
}
if (speed >= 2) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->adaptive_pred_interp_filter = 0;
sf->partition_search_breakout_thr.dist = (1 << 24);
sf->partition_search_breakout_thr.rate = 120;
} else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
sf->partition_search_breakout_thr.dist = (1 << 22);
sf->partition_search_breakout_thr.rate = 100;
}
sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
// Use a set of speed features for 4k videos.
if (VPXMIN(cm->width, cm->height) >= 2160) {
sf->use_square_partition_only = 1;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->alt_ref_search_fp = 1;
sf->cb_pred_filter_search = 1;
sf->adaptive_interp_filter_search = 1;
sf->disable_split_mask = DISABLE_ALL_SPLIT;
}
}
if (speed >= 3) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0;
sf->partition_search_breakout_thr.dist = (1 << 25);
sf->partition_search_breakout_thr.rate = 200;
} else {
sf->max_intra_bsize = BLOCK_32X32;
sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0;
sf->partition_search_breakout_thr.dist = (1 << 23);
sf->partition_search_breakout_thr.rate = 120;
}
}
// If this is a two pass clip that fits the criteria for animated or
// graphics content then reset disable_split_mask for speeds 1-4.
// Also if the image edge is internal to the coded area.
if ((speed >= 1) && (cpi->oxcf.pass == 2) &&
((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
(vp9_internal_image_edge(cpi)))) {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
}
if (speed >= 4) {
sf->partition_search_breakout_thr.rate = 300;
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->partition_search_breakout_thr.dist = (1 << 26);
} else {
sf->partition_search_breakout_thr.dist = (1 << 24);
}
sf->disable_split_mask = DISABLE_ALL_SPLIT;
}
if (speed >= 5) {
sf->partition_search_breakout_thr.rate = 500;
}
}
static double tx_dom_thresholds[6] = { 99.0, 14.0, 12.0, 8.0, 4.0, 0.0 };
static double qopt_thresholds[6] = { 99.0, 12.0, 10.0, 4.0, 2.0, 0.0 };
static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
VP9_COMMON *cm,
SPEED_FEATURES *sf,
int speed) {
const int boosted = frame_is_boosted(cpi);
int i;
sf->tx_size_search_breakout = 1;
sf->adaptive_rd_thresh = 1;
sf->adaptive_rd_thresh_row_mt = 0;
sf->allow_skip_recode = 1;
sf->less_rectangular_check = 1;
sf->use_square_partition_only = !frame_is_boosted(cpi);
sf->use_square_only_threshold = BLOCK_16X16;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
sf->exhaustive_searches_thresh = (1 << 22);
for (i = 0; i < MAX_MESH_STEP; ++i) {
int mesh_density_level = 0;
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[mesh_density_level][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
} else {
sf->exhaustive_searches_thresh = INT_MAX;
}
if (speed >= 1) {
if (cpi->oxcf.pass == 2) {
TWO_PASS *const twopass = &cpi->twopass;
if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
vp9_internal_image_edge(cpi)) {
sf->use_square_partition_only = !frame_is_boosted(cpi);
} else {
sf->use_square_partition_only = !frame_is_intra_only(cm);
}
} else {
sf->use_square_partition_only = !frame_is_intra_only(cm);
}
sf->allow_txfm_domain_distortion = 1;
sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5];
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5];
sf->use_square_only_threshold = BLOCK_4X4;
sf->less_rectangular_check = 1;
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
sf->mv.auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
sf->mv.subpel_iters_per_step = 1;
sf->mode_skip_start = 10;
sf->adaptive_pred_interp_filter = 1;
sf->allow_acl = 0;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
sf->recode_tolerance_low = 15;
sf->recode_tolerance_high = 30;
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
: INT_MAX;
}
if (speed >= 2) {
sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->tx_size_search_method =
frame_is_boosted(cpi) ? USE_FULL_RD : USE_LARGESTALL;
// Reference masking is not supported in dynamic scaling mode.
sf->reference_masking = cpi->oxcf.resize_mode != RESIZE_DYNAMIC ? 1 : 0;
sf->mode_search_skip_flags =
(cm->frame_type == KEY_FRAME)
? 0
: FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR;
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->allow_partition_search_skip = 1;
sf->recode_tolerance_low = 15;
sf->recode_tolerance_high = 45;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
int mesh_density_level = 1;
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[mesh_density_level][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
}
}
if (speed >= 3) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->tx_size_search_method =
frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->adaptive_pred_interp_filter = 0;
sf->adaptive_mode_search = 1;
sf->cb_partition_search = !boosted;
sf->cb_pred_filter_search = 1;
sf->alt_ref_search_fp = 1;
sf->recode_loop = ALLOW_RECODE_KFMAXBW;
sf->adaptive_rd_thresh = 3;
sf->mode_skip_start = 6;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
int mesh_density_level = 2;
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[mesh_density_level][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
}
}
if (speed >= 4) {
sf->use_square_partition_only = 1;
sf->tx_size_search_method = USE_LARGESTALL;
sf->mv.search_method = BIGDIA;
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
sf->adaptive_rd_thresh = 4;
if (cm->frame_type != KEY_FRAME)
sf->mode_search_skip_flags |= FLAG_EARLY_TERMINATE;
sf->disable_filter_search_var_thresh = 200;
sf->use_lp32x32fdct = 1;
sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
sf->use_fast_coef_costing = 1;
sf->motion_field_mode_search = !boosted;
}
if (speed >= 5) {
int i;
sf->optimize_coefficients = 0;
sf->mv.search_method = HEX;
sf->disable_filter_search_var_thresh = 500;
for (i = 0; i < TX_SIZES; ++i) {
sf->intra_y_mode_mask[i] = INTRA_DC;
sf->intra_uv_mode_mask[i] = INTRA_DC;
}
sf->mv.reduce_first_step_size = 1;
sf->simple_model_rd_from_var = 1;
}
}
static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
VP9_COMMON *const cm = &cpi->common;
if (speed >= 1) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
}
}
if (speed >= 2) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
} else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
}
}
if (speed >= 5) {
sf->partition_search_breakout_thr.rate = 200;
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->partition_search_breakout_thr.dist = (1 << 25);
} else {
sf->partition_search_breakout_thr.dist = (1 << 23);
}
}
if (speed >= 7) {
sf->encode_breakout_thresh =
(VPXMIN(cm->width, cm->height) >= 720) ? 800 : 300;
}
}
static void set_rt_speed_feature_framesize_independent(
VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, vp9e_tune_content content) {
VP9_COMMON *const cm = &cpi->common;
const int is_keyframe = cm->frame_type == KEY_FRAME;
const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
sf->adaptive_rd_thresh_row_mt = 0;
sf->use_fast_coef_costing = 1;
Changes to exhaustive motion search. This change alters the nature and use of exhaustive motion search. Firstly any exhaustive search is preceded by a normal step search. The exhaustive search is only carried out if the distortion resulting from the step search is above a threshold value. Secondly the simple +/- 64 exhaustive search is replaced by a multi stage mesh based search where each stage has a range and step/interval size. Subsequent stages use the best position from the previous stage as the center of the search but use a reduced range and interval size. For example: stage 1: Range +/- 64 interval 4 stage 2: Range +/- 32 interval 2 stage 3: Range +/- 15 interval 1 This process, especially when it follows on from a normal step search, has shown itself to be almost as effective as a full range exhaustive search with step 1 but greatly lowers the computational complexity such that it can be used in some cases for speeds 0-2. This patch also removes a double exhaustive search for sub 8x8 blocks which also contained a bug (the two searches used different distortion metrics). For best quality in my test animation sequence this patch has almost no impact on quality but improves encode speed by more than 5X. Restricted use in good quality speeds 0-2 yields significant quality gains on the animation test of 0.2 - 0.5 db with only a small impact on encode speed. On most clips though the quality gain and speed impact are small. Change-Id: Id22967a840e996e1db273f6ac4ff03f4f52d49aa
2015-10-14 10:38:49 +01:00
sf->exhaustive_searches_thresh = INT_MAX;
sf->allow_acl = 0;
sf->copy_partition_flag = 0;
sf->use_source_sad = 0;
sf->use_simple_block_yrd = 0;
sf->adapt_partition_source_sad = 0;
sf->use_altref_onepass = 0;
if (speed >= 1) {
sf->allow_txfm_domain_distortion = 1;
sf->tx_domain_thresh = 0.0;
sf->allow_quant_coeff_opt = 0;
sf->quant_opt_thresh = 0.0;
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
sf->tx_size_search_method =
frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
sf->adaptive_pred_interp_filter = 1;
sf->mv.auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
}
if (speed >= 2) {
sf->mode_search_skip_flags =
(cm->frame_type == KEY_FRAME)
? 0
: FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
// Reference masking only enabled for 1 spatial layer, and if none of the
// references have been scaled. The latter condition needs to be checked
// for external or internal dynamic resize.
sf->reference_masking = (cpi->svc.number_spatial_layers == 1);
if (sf->reference_masking == 1 &&
(cpi->external_resize == 1 ||
cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) {
MV_REFERENCE_FRAME ref_frame;
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {
const struct scale_factors *const scale_fac =
&cm->frame_refs[ref_frame - 1].sf;
if (vp9_is_scaled(scale_fac)) sf->reference_masking = 0;
}
}
}
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
}
if (speed >= 3) {
sf->use_square_partition_only = 1;
sf->disable_filter_search_var_thresh = 100;
sf->use_uv_intra_rd_estimate = 1;
sf->skip_encode_sb = 1;
sf->mv.subpel_iters_per_step = 1;
sf->adaptive_rd_thresh = 4;
sf->mode_skip_start = 6;
sf->allow_skip_recode = 0;
sf->optimize_coefficients = 0;
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->lpf_pick = LPF_PICK_FROM_Q;
}
if (speed >= 4) {
int i;
if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0)
sf->use_altref_onepass = 1;
sf->last_partitioning_redo_frequency = 4;
sf->adaptive_rd_thresh = 5;
sf->use_fast_coef_costing = 0;
sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
sf->adjust_partitioning_from_last_frame =
cm->last_frame_type != cm->frame_type ||
(0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
sf->mv.subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[i] = INTRA_DC;
}
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->frame_parameter_update = 0;
sf->mv.search_method = FAST_HEX;
sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
sf->max_intra_bsize = BLOCK_32X32;
sf->allow_skip_recode = 1;
}
if (speed >= 5) {
sf->use_altref_onepass = 0;
sf->use_quant_fp = !is_keyframe;
sf->auto_min_max_partition_size =
is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
sf->default_max_partition_size = BLOCK_32X32;
sf->default_min_partition_size = BLOCK_8X8;
sf->force_frame_boost =
is_keyframe ||
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
cpi->rc.is_src_frame_alt_ref) {
sf->partition_search_type = VAR_BASED_PARTITION;
}
sf->use_nonrd_pick_mode = 1;
sf->allow_skip_recode = 0;
sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
sf->adaptive_rd_thresh = 2;
// This feature is only enabled when partition search is disabled.
sf->reuse_inter_pred_sby = 1;
sf->coeff_prob_appx_step = 4;
sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED;
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
sf->simple_model_rd_from_var = 1;
if (cpi->oxcf.rc_mode == VPX_VBR) sf->mv.search_method = NSTEP;
if (!is_keyframe) {
int i;
if (content == VP9E_CONTENT_SCREEN) {
for (i = 0; i < BLOCK_SIZES; ++i)
sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V;
} else {
for (i = 0; i < BLOCK_SIZES; ++i)
if (i > BLOCK_16X16)
sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
else
// Use H and V intra mode for block sizes <= 16X16.
sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V;
}
}
if (content == VP9E_CONTENT_SCREEN) {
sf->short_circuit_flat_blocks = 1;
}
if (cpi->oxcf.rc_mode == VPX_CBR &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
sf->limit_newmv_early_exit = 1;
if (!cpi->use_svc) sf->bias_golden = 1;
}
}
if (speed >= 6) {
if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0)
sf->use_altref_onepass = 1;
sf->partition_search_type = VAR_BASED_PARTITION;
// Turn on this to use non-RD key frame coding mode.
sf->use_nonrd_pick_mode = 1;
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
if (!cpi->external_resize) sf->use_source_sad = 1;
if (sf->use_source_sad) {
sf->adapt_partition_source_sad = 1;
sf->adapt_partition_thresh =
(cm->width * cm->height <= 640 * 360) ? 40000 : 80000;
if (sf->use_altref_onepass && cpi->refresh_alt_ref_frame && !is_keyframe)
sf->adapt_partition_thresh = (3 * sf->adapt_partition_thresh) >> 2;
if (cpi->content_state_sb_fd == NULL &&
(!cpi->use_svc ||
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
cpi->content_state_sb_fd = (uint8_t *)vpx_calloc(
(cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
}
}
if (cpi->oxcf.rc_mode == VPX_CBR && content != VP9E_CONTENT_SCREEN) {
// Enable short circuit for low temporal variance.
sf->short_circuit_low_temp_var = 1;
}
if (cpi->svc.temporal_layer_id > 0) {
sf->adaptive_rd_thresh = 4;
sf->limit_newmv_early_exit = 0;
sf->base_mv_aggressive = 1;
}
}
if (speed >= 7) {
sf->adapt_partition_source_sad = 0;
sf->adaptive_rd_thresh = 3;
sf->mv.search_method = FAST_DIAMOND;
sf->mv.fullpel_search_step_param = 10;
// For SVC: use better mv search on base temporal layer, and only
// on base spatial layer if highest resolution is above 640x360.
if (cpi->svc.number_temporal_layers > 2 &&
cpi->svc.temporal_layer_id == 0 &&
(cpi->svc.spatial_layer_id == 0 ||
cpi->oxcf.width * cpi->oxcf.height <= 640 * 360)) {
sf->mv.search_method = NSTEP;
sf->mv.fullpel_search_step_param = 6;
}
if (cpi->svc.temporal_layer_id > 0 || cpi->svc.spatial_layer_id > 1) {
sf->use_simple_block_yrd = 1;
if (cpi->svc.non_reference_frame)
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_EVENMORE;
}
// Enable partition copy. For SVC only enabled for top spatial resolution
// layer.
cpi->max_copied_frame = 0;
if (!cpi->last_frame_dropped && cpi->resize_state == ORIG &&
!cpi->external_resize &&
(!cpi->use_svc ||
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
sf->copy_partition_flag = 1;
cpi->max_copied_frame = 2;
// The top temporal enhancement layer (for number of temporal layers > 1)
// are non-reference frames, so use large/max value for max_copied_frame.
if (cpi->svc.number_temporal_layers > 1 &&
cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1)
cpi->max_copied_frame = 255;
}
}
if (speed >= 8) {
sf->adaptive_rd_thresh = 4;
sf->skip_encode_sb = 1;
if (!cpi->use_svc) cpi->max_copied_frame = 4;
if (cpi->row_mt && cpi->oxcf.max_threads > 1)
sf->adaptive_rd_thresh_row_mt = 1;
if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = 3;
if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
// Only keep INTRA_DC mode for speed 8.
if (!is_keyframe) {
int i = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
}
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
content != VP9E_CONTENT_SCREEN) {
// More aggressive short circuit for speed 8.
sf->short_circuit_low_temp_var = 3;
// Use level 2 for noisey cases as there is a regression in some
// noisy clips with level 3.
if (cpi->noise_estimate.enabled && cm->width >= 1280 &&
cm->height >= 720) {
NOISE_LEVEL noise_level =
vp9_noise_estimate_extract_level(&cpi->noise_estimate);
if (noise_level >= kMedium) sf->short_circuit_low_temp_var = 2;
}
// Since the short_circuit_low_temp_var is used, reduce the
// adaptive_rd_thresh level.
if (cm->width * cm->height > 352 * 288)
sf->adaptive_rd_thresh = 1;
else
sf->adaptive_rd_thresh = 2;
}
sf->limit_newmv_early_exit = 0;
sf->use_simple_block_yrd = 1;
}
if (sf->use_altref_onepass) {
if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) {
sf->partition_search_type = FIXED_PARTITION;
sf->always_this_block_size = BLOCK_64X64;
}
if (cpi->count_arf_frame_usage == NULL)
cpi->count_arf_frame_usage =
(uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->count_arf_frame_usage));
if (cpi->count_lastgolden_frame_usage == NULL)
cpi->count_lastgolden_frame_usage =
(uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->count_lastgolden_frame_usage));
}
}
void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
SPEED_FEATURES *const sf = &cpi->sf;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
RD_OPT *const rd = &cpi->rd;
int i;
// best quality defaults
// Some speed-up features even for best quality as minimal impact on quality.
sf->partition_search_breakout_thr.dist = (1 << 19);
sf->partition_search_breakout_thr.rate = 80;
sf->ml_partition_search_early_termination = 0;
if (oxcf->mode == REALTIME) {
set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
} else if (oxcf->mode == GOOD) {
set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
}
if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
sf->adaptive_pred_interp_filter = 0;
}
if (cpi->encode_breakout && oxcf->mode == REALTIME &&
sf->encode_breakout_thresh > cpi->encode_breakout) {
cpi->encode_breakout = sf->encode_breakout_thresh;
}
// Check for masked out split cases.
for (i = 0; i < MAX_REFS; ++i) {
if (sf->disable_split_mask & (1 << i)) {
rd->thresh_mult_sub8x8[i] = INT_MAX;
}
}
// With row based multi-threading, the following speed features
// have to be disabled to guarantee that bitstreams encoded with single thread
// and multiple threads match.
// It can be used in realtime when adaptive_rd_thresh_row_mt is enabled since
// adaptive_rd_thresh is defined per-row for non-rd pickmode.
if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact &&
oxcf->max_threads > 1)
sf->adaptive_rd_thresh = 0;
// This is only used in motion vector unit test.
if (cpi->oxcf.motion_vector_unit_test == 1)
cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
else if (cpi->oxcf.motion_vector_unit_test == 2)
cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
}
void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
SPEED_FEATURES *const sf = &cpi->sf;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->td.mb;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
int i;
// best quality defaults
sf->frame_parameter_update = 1;
sf->mv.search_method = NSTEP;
sf->recode_loop = ALLOW_RECODE_FIRST;
sf->mv.subpel_search_method = SUBPEL_TREE;
sf->mv.subpel_iters_per_step = 2;
sf->mv.subpel_force_stop = 0;
sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf);
sf->mv.reduce_first_step_size = 0;
sf->coeff_prob_appx_step = 1;
sf->mv.auto_mv_step_size = 0;
sf->mv.fullpel_search_step_param = 6;
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
sf->adaptive_pred_interp_filter = 0;
sf->adaptive_mode_search = 0;
sf->cb_pred_filter_search = 0;
sf->cb_partition_search = 0;
sf->motion_field_mode_search = 0;
sf->alt_ref_search_fp = 0;
sf->use_quant_fp = 0;
sf->reference_masking = 0;
sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
sf->use_square_only_threshold = BLOCK_SIZES;
sf->auto_min_max_partition_size = NOT_IN_USE;
sf->rd_auto_partition_min_limit = BLOCK_4X4;
sf->default_max_partition_size = BLOCK_64X64;
sf->default_min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0;
sf->last_partitioning_redo_frequency = 4;
sf->disable_split_mask = 0;
sf->mode_search_skip_flags = 0;
sf->force_frame_boost = 0;
sf->max_delta_qindex = 0;
sf->disable_filter_search_var_thresh = 0;
sf->adaptive_interp_filter_search = 0;
sf->allow_partition_search_skip = 0;
sf->allow_txfm_domain_distortion = 0;
sf->tx_domain_thresh = 99.0;
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
sf->quant_opt_thresh = 99.0;
sf->allow_acl = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
sf->intra_uv_mode_mask[i] = INTRA_ALL;
}
sf->use_rd_breakout = 0;
sf->skip_encode_sb = 0;
sf->use_uv_intra_rd_estimate = 0;
sf->allow_skip_recode = 0;
sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
sf->use_fast_coef_updates = TWO_LOOP;
sf->use_fast_coef_costing = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->schedule_mode_search = 0;
sf->use_nonrd_pick_mode = 0;
for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;
sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
sf->always_this_block_size = BLOCK_16X16;
sf->search_type_check_frequency = 50;
sf->encode_breakout_thresh = 0;
// Recode loop tolerance %.
sf->recode_tolerance_low = 12;
sf->recode_tolerance_high = 25;
sf->default_interp_filter = SWITCHABLE;
sf->simple_model_rd_from_var = 0;
sf->short_circuit_flat_blocks = 0;
sf->short_circuit_low_temp_var = 0;
sf->limit_newmv_early_exit = 0;
sf->bias_golden = 0;
sf->base_mv_aggressive = 0;
// Some speed-up features even for best quality as minimal impact on quality.
sf->adaptive_rd_thresh = 1;
sf->tx_size_search_breakout = 1;
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
: INT_MAX;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
}
}
if (oxcf->mode == REALTIME)
set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed,
oxcf->content);
else if (oxcf->mode == GOOD)
set_good_speed_feature_framesize_independent(cpi, cm, sf, oxcf->speed);
Changes to exhaustive motion search. This change alters the nature and use of exhaustive motion search. Firstly any exhaustive search is preceded by a normal step search. The exhaustive search is only carried out if the distortion resulting from the step search is above a threshold value. Secondly the simple +/- 64 exhaustive search is replaced by a multi stage mesh based search where each stage has a range and step/interval size. Subsequent stages use the best position from the previous stage as the center of the search but use a reduced range and interval size. For example: stage 1: Range +/- 64 interval 4 stage 2: Range +/- 32 interval 2 stage 3: Range +/- 15 interval 1 This process, especially when it follows on from a normal step search, has shown itself to be almost as effective as a full range exhaustive search with step 1 but greatly lowers the computational complexity such that it can be used in some cases for speeds 0-2. This patch also removes a double exhaustive search for sub 8x8 blocks which also contained a bug (the two searches used different distortion metrics). For best quality in my test animation sequence this patch has almost no impact on quality but improves encode speed by more than 5X. Restricted use in good quality speeds 0-2 yields significant quality gains on the animation test of 0.2 - 0.5 db with only a small impact on encode speed. On most clips though the quality gain and speed impact are small. Change-Id: Id22967a840e996e1db273f6ac4ff03f4f52d49aa
2015-10-14 10:38:49 +01:00
cpi->diamond_search_sad = vp9_diamond_search_sad;
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
if (oxcf->pass == 1) sf->optimize_coefficients = 0;
// No recode for 1 pass.
if (oxcf->pass == 0) {
sf->recode_loop = DISALLOW_RECODE;
sf->optimize_coefficients = 0;
}
if (sf->mv.subpel_force_stop == 3) {
// Whole pel only
cpi->find_fractional_mv_step = vp9_skip_sub_pixel_tree;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_more;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_evenmore;
}
x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
x->min_partition_size = sf->default_min_partition_size;
x->max_partition_size = sf->default_max_partition_size;
if (!cpi->oxcf.frame_periodic_boost) {
sf->max_delta_qindex = 0;
}
// With row based multi-threading, the following speed features
// have to be disabled to guarantee that bitstreams encoded with single thread
// and multiple threads match.
// It can be used in realtime when adaptive_rd_thresh_row_mt is enabled since
// adaptive_rd_thresh is defined per-row for non-rd pickmode.
if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact &&
oxcf->max_threads > 1)
sf->adaptive_rd_thresh = 0;
// This is only used in motion vector unit test.
if (cpi->oxcf.motion_vector_unit_test == 1)
cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
else if (cpi->oxcf.motion_vector_unit_test == 2)
cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
}