Make allow_exhaustive_searches feature no longer adaptive
A previous patch turned on allow_exhaustive_searches feature only for FC_GRAPHICS_ANIMATION content. This patch further modified the feature by removing the exhaustive search limit, and made it no longer adaptive. As a result, the 2 counts that recorded the number of motion searches were removed, which helped achieve the determinism in the row based multi-threading encoding. Tests showed that this patch didn't cause the encoder much slower. Used exhaustive_searches_thresh for this speed feature, and removed allow_exhaustive_searches. Also, refactored the speed feature code to follow the general speed feature setting style. Change-Id: Ib96b182c4c8dfff4c1ab91d2497cc42bb9e5a4aa
This commit is contained in:
parent
30ef50b522
commit
bca4564683
@ -93,11 +93,6 @@ struct macroblock {
|
||||
int rddiv;
|
||||
int rdmult;
|
||||
int mb_energy;
|
||||
int *m_search_count_ptr;
|
||||
int *ex_search_count_ptr;
|
||||
#if CONFIG_MULTITHREAD
|
||||
pthread_mutex_t *search_count_mutex;
|
||||
#endif
|
||||
|
||||
// These are set to their default values at the beginning, and then adjusted
|
||||
// further in the encoding process.
|
||||
|
@ -4341,7 +4341,6 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
#if CONFIG_MULTITHREAD
|
||||
tile_data->search_count_mutex = NULL;
|
||||
tile_data->enc_row_mt_mutex = NULL;
|
||||
tile_data->row_base_thresh_freq_fact = NULL;
|
||||
#endif
|
||||
@ -4361,10 +4360,6 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
|
||||
cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
|
||||
tplist = cpi->tplist[tile_row][tile_col];
|
||||
tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
|
||||
|
||||
// Set up pointers to per thread motion search counters.
|
||||
this_tile->m_search_count = 0; // Count of motion search hits.
|
||||
this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4409,13 +4404,6 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
|
||||
const int mi_row_end = tile_info->mi_row_end;
|
||||
int mi_row;
|
||||
|
||||
// Set up pointers to per thread motion search counters.
|
||||
td->mb.m_search_count_ptr = &this_tile->m_search_count;
|
||||
td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
|
||||
#if CONFIG_MULTITHREAD
|
||||
td->mb.search_count_mutex = this_tile->search_count_mutex;
|
||||
#endif
|
||||
|
||||
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
|
||||
vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
|
||||
}
|
||||
|
@ -281,15 +281,12 @@ typedef struct TileDataEnc {
|
||||
TileInfo tile_info;
|
||||
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
|
||||
int mode_map[BLOCK_SIZES][MAX_MODES];
|
||||
int m_search_count;
|
||||
int ex_search_count;
|
||||
FIRSTPASS_DATA fp_data;
|
||||
VP9RowMTSync row_mt_sync;
|
||||
|
||||
// Used for adaptive_rd_thresh with row multithreading
|
||||
int *row_base_thresh_freq_fact;
|
||||
#if CONFIG_MULTITHREAD
|
||||
pthread_mutex_t *search_count_mutex;
|
||||
pthread_mutex_t *enc_row_mt_mutex;
|
||||
#endif
|
||||
} TileDataEnc;
|
||||
|
@ -552,7 +552,6 @@ static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
|
||||
const VP9_COMMON *const cm = &cpi->common;
|
||||
const int tile_cols = 1 << cm->log2_tile_cols;
|
||||
int tile_row, tile_col;
|
||||
TileDataEnc *this_tile;
|
||||
int end_of_frame;
|
||||
int thread_id = thread_data->thread_id;
|
||||
int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
|
||||
@ -574,13 +573,6 @@ static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
|
||||
tile_row = proc_job->tile_row_id;
|
||||
mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
|
||||
|
||||
this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
|
||||
thread_data->td->mb.m_search_count_ptr = &this_tile->m_search_count;
|
||||
thread_data->td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
|
||||
#if CONFIG_MULTITHREAD
|
||||
thread_data->td->mb.search_count_mutex = this_tile->search_count_mutex;
|
||||
#endif
|
||||
|
||||
vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
|
||||
}
|
||||
}
|
||||
|
@ -1998,18 +1998,6 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int range = sf->mesh_patterns[0].range;
|
||||
int baseline_interval_divisor;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
|
||||
#endif
|
||||
|
||||
// Keep track of number of exhaustive calls (this frame in this thread).
|
||||
++(*x->ex_search_count_ptr);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (NULL != x->search_count_mutex)
|
||||
pthread_mutex_unlock(x->search_count_mutex);
|
||||
#endif
|
||||
|
||||
// Trap illegal values for interval and range for this function.
|
||||
if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
|
||||
(interval > range))
|
||||
@ -2367,32 +2355,6 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
|
||||
return best_sad;
|
||||
}
|
||||
|
||||
#define MIN_EX_SEARCH_LIMIT 128
|
||||
static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
|
||||
const SPEED_FEATURES *const sf = &cpi->sf;
|
||||
int is_exhaustive_allowed;
|
||||
int max_ex;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
|
||||
#endif
|
||||
|
||||
max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
|
||||
(*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
|
||||
|
||||
is_exhaustive_allowed = sf->allow_exhaustive_searches &&
|
||||
(sf->exhaustive_searches_thresh < INT_MAX) &&
|
||||
(*x->ex_search_count_ptr <= max_ex) &&
|
||||
!cpi->rc.is_src_frame_alt_ref;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (NULL != x->search_count_mutex)
|
||||
pthread_mutex_unlock(x->search_count_mutex);
|
||||
#endif
|
||||
|
||||
return is_exhaustive_allowed;
|
||||
}
|
||||
|
||||
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
|
||||
MV *mvp_full, int step_param, int search_method,
|
||||
int error_per_bit, int *cost_list, const MV *ref_mv,
|
||||
@ -2435,21 +2397,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
|
||||
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
|
||||
cost_list, fn_ptr, ref_mv, tmp_mv);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (NULL != x->search_count_mutex)
|
||||
pthread_mutex_lock(x->search_count_mutex);
|
||||
#endif
|
||||
|
||||
// Keep track of number of searches (this frame in this thread).
|
||||
++(*x->m_search_count_ptr);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (NULL != x->search_count_mutex)
|
||||
pthread_mutex_unlock(x->search_count_mutex);
|
||||
#endif
|
||||
|
||||
// Should we allow a follow on exhaustive search?
|
||||
if (is_exhaustive_allowed(cpi, x)) {
|
||||
if ((sf->exhaustive_searches_thresh < INT_MAX) &&
|
||||
!cpi->rc.is_src_frame_alt_ref) {
|
||||
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
|
||||
exhuastive_thr >>=
|
||||
8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
|
||||
|
@ -116,11 +116,6 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
|
||||
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
|
||||
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
|
||||
|
||||
CHECK_MEM_ERROR(cm, this_tile->search_count_mutex,
|
||||
vpx_malloc(sizeof(*this_tile->search_count_mutex)));
|
||||
|
||||
pthread_mutex_init(this_tile->search_count_mutex, NULL);
|
||||
|
||||
CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex,
|
||||
vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex)));
|
||||
|
||||
@ -170,9 +165,6 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
|
||||
this_tile->row_base_thresh_freq_fact = NULL;
|
||||
}
|
||||
}
|
||||
pthread_mutex_destroy(this_tile->search_count_mutex);
|
||||
vpx_free(this_tile->search_count_mutex);
|
||||
this_tile->search_count_mutex = NULL;
|
||||
pthread_mutex_destroy(this_tile->enc_row_mt_mutex);
|
||||
vpx_free(this_tile->enc_row_mt_mutex);
|
||||
this_tile->enc_row_mt_mutex = NULL;
|
||||
|
@ -20,19 +20,14 @@ static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = {
|
||||
{ 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
|
||||
};
|
||||
|
||||
#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
|
||||
// Define 3 mesh density levels to control the number of searches.
|
||||
#define MESH_DENSITY_LEVELS 3
|
||||
static MESH_PATTERN
|
||||
good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
|
||||
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
|
||||
good_quality_mesh_patterns[MESH_DENSITY_LEVELS][MAX_MESH_STEP] = {
|
||||
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
|
||||
{ { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
|
||||
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
|
||||
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
|
||||
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
|
||||
};
|
||||
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
|
||||
50, 25, 15, 5, 1, 1
|
||||
};
|
||||
|
||||
// Intra only frames, golden frames (except alt ref overlays) and
|
||||
// alt ref frames tend to be coded at a higher than ambient quality
|
||||
@ -163,6 +158,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
|
||||
SPEED_FEATURES *sf,
|
||||
int speed) {
|
||||
const int boosted = frame_is_boosted(cpi);
|
||||
int i;
|
||||
|
||||
sf->tx_size_search_breakout = 1;
|
||||
sf->adaptive_rd_thresh = 1;
|
||||
@ -171,6 +167,19 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
|
||||
sf->use_square_partition_only = !frame_is_boosted(cpi);
|
||||
sf->use_square_only_threshold = BLOCK_16X16;
|
||||
|
||||
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
|
||||
sf->exhaustive_searches_thresh = (1 << 22);
|
||||
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||
int mesh_density_level = 0;
|
||||
sf->mesh_patterns[i].range =
|
||||
good_quality_mesh_patterns[mesh_density_level][i].range;
|
||||
sf->mesh_patterns[i].interval =
|
||||
good_quality_mesh_patterns[mesh_density_level][i].interval;
|
||||
}
|
||||
} else {
|
||||
sf->exhaustive_searches_thresh = INT_MAX;
|
||||
}
|
||||
|
||||
if (speed >= 1) {
|
||||
if (cpi->oxcf.pass == 2) {
|
||||
TWO_PASS *const twopass = &cpi->twopass;
|
||||
@ -208,6 +217,10 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
|
||||
|
||||
sf->recode_tolerance_low = 15;
|
||||
sf->recode_tolerance_high = 30;
|
||||
|
||||
sf->exhaustive_searches_thresh =
|
||||
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
|
||||
: INT_MAX;
|
||||
}
|
||||
|
||||
if (speed >= 2) {
|
||||
@ -229,6 +242,16 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
|
||||
sf->allow_partition_search_skip = 1;
|
||||
sf->recode_tolerance_low = 15;
|
||||
sf->recode_tolerance_high = 45;
|
||||
|
||||
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
|
||||
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||
int mesh_density_level = 1;
|
||||
sf->mesh_patterns[i].range =
|
||||
good_quality_mesh_patterns[mesh_density_level][i].range;
|
||||
sf->mesh_patterns[i].interval =
|
||||
good_quality_mesh_patterns[mesh_density_level][i].interval;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (speed >= 3) {
|
||||
@ -247,6 +270,16 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
|
||||
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
|
||||
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
|
||||
sf->adaptive_interp_filter_search = 1;
|
||||
|
||||
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
|
||||
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||
int mesh_density_level = 2;
|
||||
sf->mesh_patterns[i].range =
|
||||
good_quality_mesh_patterns[mesh_density_level][i].range;
|
||||
sf->mesh_patterns[i].interval =
|
||||
good_quality_mesh_patterns[mesh_density_level][i].interval;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (speed >= 4) {
|
||||
@ -325,7 +358,6 @@ static void set_rt_speed_feature_framesize_independent(
|
||||
sf->adaptive_rd_thresh = 1;
|
||||
sf->adaptive_rd_thresh_row_mt = 0;
|
||||
sf->use_fast_coef_costing = 1;
|
||||
sf->allow_exhaustive_searches = 0;
|
||||
sf->exhaustive_searches_thresh = INT_MAX;
|
||||
sf->allow_acl = 0;
|
||||
sf->copy_partition_flag = 0;
|
||||
@ -609,7 +641,6 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
|
||||
// and multiple threads match
|
||||
if (cpi->oxcf.row_mt_bit_exact) {
|
||||
sf->adaptive_rd_thresh = 0;
|
||||
sf->allow_exhaustive_searches = 0;
|
||||
sf->adaptive_pred_interp_filter = 0;
|
||||
}
|
||||
|
||||
@ -711,6 +742,16 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
|
||||
sf->adaptive_rd_thresh = 1;
|
||||
sf->tx_size_search_breakout = 1;
|
||||
|
||||
sf->exhaustive_searches_thresh =
|
||||
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
|
||||
: INT_MAX;
|
||||
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
|
||||
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||
sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
|
||||
sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
|
||||
}
|
||||
}
|
||||
|
||||
if (oxcf->mode == REALTIME)
|
||||
set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed,
|
||||
oxcf->content);
|
||||
@ -720,32 +761,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
|
||||
cpi->full_search_sad = vp9_full_search_sad;
|
||||
cpi->diamond_search_sad = vp9_diamond_search_sad;
|
||||
|
||||
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
|
||||
sf->allow_exhaustive_searches = 1;
|
||||
if (oxcf->mode == BEST) {
|
||||
sf->exhaustive_searches_thresh = (1 << 20);
|
||||
sf->max_exaustive_pct = 100;
|
||||
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||
sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
|
||||
sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
|
||||
}
|
||||
} else {
|
||||
int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
|
||||
sf->exhaustive_searches_thresh = (1 << 22);
|
||||
sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
|
||||
if (speed > 0)
|
||||
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
|
||||
|
||||
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||
sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
|
||||
sf->mesh_patterns[i].interval =
|
||||
good_quality_mesh_patterns[speed][i].interval;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sf->allow_exhaustive_searches = 0;
|
||||
}
|
||||
|
||||
// Slow quant, dct and trellis not worthwhile for first pass
|
||||
// so make sure they are always turned off.
|
||||
if (oxcf->pass == 1) sf->optimize_coefficients = 0;
|
||||
@ -783,7 +798,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
|
||||
// and multiple threads match
|
||||
if (cpi->oxcf.row_mt_bit_exact) {
|
||||
sf->adaptive_rd_thresh = 0;
|
||||
sf->allow_exhaustive_searches = 0;
|
||||
sf->adaptive_pred_interp_filter = 0;
|
||||
}
|
||||
|
||||
|
@ -325,15 +325,9 @@ typedef struct SPEED_FEATURES {
|
||||
// point for this motion search and limits the search range around it.
|
||||
int adaptive_motion_search;
|
||||
|
||||
// Flag for allowing some use of exhaustive searches;
|
||||
int allow_exhaustive_searches;
|
||||
|
||||
// Threshold for allowing exhaistive motion search.
|
||||
int exhaustive_searches_thresh;
|
||||
|
||||
// Maximum number of exhaustive searches for a frame.
|
||||
int max_exaustive_pct;
|
||||
|
||||
// Pattern to be used for any exhaustive mesh searches.
|
||||
MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user