Changes to exhaustive motion search.
This change alters the nature and use of exhaustive motion search. Firstly any exhaustive search is preceded by a normal step search. The exhaustive search is only carried out if the distortion resulting from the step search is above a threshold value. Secondly the simple +/- 64 exhaustive search is replaced by a multi stage mesh based search where each stage has a range and step/interval size. Subsequent stages use the best position from the previous stage as the center of the search but use a reduced range and interval size. For example: stage 1: Range +/- 64 interval 4 stage 2: Range +/- 32 interval 2 stage 3: Range +/- 15 interval 1 This process, especially when it follows on from a normal step search, has shown itself to be almost as effective as a full range exhaustive search with step 1 but greatly lowers the computational complexity such that it can be used in some cases for speeds 0-2. This patch also removes a double exhaustive search for sub 8x8 blocks which also contained a bug (the two searches used different distortion metrics). For best quality in my test animation sequence this patch has almost no impact on quality but improves encode speed by more than 5X. Restricted use in good quality speeds 0-2 yields significant quality gains on the animation test of 0.2 - 0.5 db with only a small impact on encode speed. On most clips though the quality gain and speed impact are small. Change-Id: Id22967a840e996e1db273f6ac4ff03f4f52d49aa
This commit is contained in:
@@ -314,9 +314,6 @@ $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
|
|||||||
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
|
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
|
||||||
specialize qw/vp9_diamond_search_sad avx/;
|
specialize qw/vp9_diamond_search_sad avx/;
|
||||||
|
|
||||||
add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
|
|
||||||
specialize qw/vp9_full_range_search/;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
|
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
|
||||||
specialize qw/vp9_temporal_filter_apply sse2 msa/;
|
specialize qw/vp9_temporal_filter_apply sse2 msa/;
|
||||||
|
|
||||||
|
@@ -71,6 +71,8 @@ struct macroblock {
|
|||||||
int rddiv;
|
int rddiv;
|
||||||
int rdmult;
|
int rdmult;
|
||||||
int mb_energy;
|
int mb_energy;
|
||||||
|
int * m_search_count_ptr;
|
||||||
|
int * ex_search_count_ptr;
|
||||||
|
|
||||||
// These are set to their default values at the beginning, and then adjusted
|
// These are set to their default values at the beginning, and then adjusted
|
||||||
// further in the encoding process.
|
// further in the encoding process.
|
||||||
|
@@ -3839,6 +3839,10 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td,
|
|||||||
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
|
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
|
||||||
int mi_row;
|
int mi_row;
|
||||||
|
|
||||||
|
// Set up pointers to per thread motion search counters.
|
||||||
|
td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
|
||||||
|
td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
|
||||||
|
|
||||||
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
|
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
|
||||||
mi_row += MI_BLOCK_SIZE) {
|
mi_row += MI_BLOCK_SIZE) {
|
||||||
if (cpi->sf.use_nonrd_pick_mode)
|
if (cpi->sf.use_nonrd_pick_mode)
|
||||||
@@ -3895,6 +3899,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
|||||||
vp9_zero(rdc->coef_counts);
|
vp9_zero(rdc->coef_counts);
|
||||||
vp9_zero(rdc->comp_pred_diff);
|
vp9_zero(rdc->comp_pred_diff);
|
||||||
vp9_zero(rdc->filter_diff);
|
vp9_zero(rdc->filter_diff);
|
||||||
|
rdc->m_search_count = 0; // Count of motion search hits.
|
||||||
|
rdc->ex_search_count = 0; // Exhaustive mesh search hits.
|
||||||
|
|
||||||
|
|
||||||
xd->lossless = cm->base_qindex == 0 &&
|
xd->lossless = cm->base_qindex == 0 &&
|
||||||
cm->y_dc_delta_q == 0 &&
|
cm->y_dc_delta_q == 0 &&
|
||||||
|
@@ -2995,7 +2995,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
|
|||||||
recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
|
recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
|
||||||
|
|
||||||
if (cpi->twopass.total_left_stats.coded_error != 0.0)
|
if (cpi->twopass.total_left_stats.coded_error != 0.0)
|
||||||
fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
|
fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
|
||||||
"%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
|
"%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
|
||||||
"%10"PRId64" %10"PRId64" %10d "
|
"%10"PRId64" %10"PRId64" %10d "
|
||||||
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
|
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
|
||||||
@@ -3004,6 +3004,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
|
|||||||
"%10lf %8u %10"PRId64" %10d %10d %10d\n",
|
"%10lf %8u %10"PRId64" %10d %10d %10d\n",
|
||||||
cpi->common.current_video_frame,
|
cpi->common.current_video_frame,
|
||||||
cm->width, cm->height,
|
cm->width, cm->height,
|
||||||
|
cpi->td.rd_counts.m_search_count,
|
||||||
|
cpi->td.rd_counts.ex_search_count,
|
||||||
cpi->rc.source_alt_ref_pending,
|
cpi->rc.source_alt_ref_pending,
|
||||||
cpi->rc.source_alt_ref_active,
|
cpi->rc.source_alt_ref_active,
|
||||||
cpi->rc.this_frame_target,
|
cpi->rc.this_frame_target,
|
||||||
|
@@ -260,6 +260,8 @@ typedef struct RD_COUNTS {
|
|||||||
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
|
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
|
||||||
int64_t comp_pred_diff[REFERENCE_MODES];
|
int64_t comp_pred_diff[REFERENCE_MODES];
|
||||||
int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
|
int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
|
||||||
|
int m_search_count;
|
||||||
|
int ex_search_count;
|
||||||
} RD_COUNTS;
|
} RD_COUNTS;
|
||||||
|
|
||||||
typedef struct ThreadData {
|
typedef struct ThreadData {
|
||||||
|
@@ -30,6 +30,10 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
|
|||||||
for (n = 0; n < ENTROPY_TOKENS; n++)
|
for (n = 0; n < ENTROPY_TOKENS; n++)
|
||||||
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
|
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
|
||||||
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
|
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
|
||||||
|
|
||||||
|
// Counts of all motion searches and exhuastive mesh searches.
|
||||||
|
td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
|
||||||
|
td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
|
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
|
||||||
|
@@ -1517,53 +1517,66 @@ static int fast_dia_search(const MACROBLOCK *x,
|
|||||||
|
|
||||||
#undef CHECK_BETTER
|
#undef CHECK_BETTER
|
||||||
|
|
||||||
int vp9_full_range_search_c(const MACROBLOCK *x,
|
// Exhuastive motion search around a given centre position with a given
|
||||||
const search_site_config *cfg,
|
// step size.
|
||||||
|
static int exhuastive_mesh_search(const MACROBLOCK *x,
|
||||||
MV *ref_mv, MV *best_mv,
|
MV *ref_mv, MV *best_mv,
|
||||||
int search_param, int sad_per_bit, int *num00,
|
int range, int step, int sad_per_bit,
|
||||||
const vp9_variance_fn_ptr_t *fn_ptr,
|
const vp9_variance_fn_ptr_t *fn_ptr,
|
||||||
const MV *center_mv) {
|
const MV *center_mv) {
|
||||||
const MACROBLOCKD *const xd = &x->e_mbd;
|
const MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
const struct buf_2d *const what = &x->plane[0].src;
|
const struct buf_2d *const what = &x->plane[0].src;
|
||||||
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
|
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
|
||||||
const int range = 64;
|
MV fcenter_mv = {center_mv->row, center_mv->col};
|
||||||
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
|
|
||||||
unsigned int best_sad = INT_MAX;
|
unsigned int best_sad = INT_MAX;
|
||||||
int r, c, i;
|
int r, c, i;
|
||||||
int start_col, end_col, start_row, end_row;
|
int start_col, end_col, start_row, end_row;
|
||||||
|
int col_step = (step > 1) ? step : 4;
|
||||||
|
|
||||||
// The cfg and search_param parameters are not used in this search variant
|
assert(step >= 1);
|
||||||
(void)cfg;
|
|
||||||
(void)search_param;
|
|
||||||
|
|
||||||
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
|
clamp_mv(&fcenter_mv, x->mv_col_min, x->mv_col_max,
|
||||||
*best_mv = *ref_mv;
|
x->mv_row_min, x->mv_row_max);
|
||||||
*num00 = 11;
|
*best_mv = fcenter_mv;
|
||||||
best_sad = fn_ptr->sdf(what->buf, what->stride,
|
best_sad = fn_ptr->sdf(what->buf, what->stride,
|
||||||
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
|
get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
|
||||||
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
|
mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
|
||||||
start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row);
|
start_row = VPXMAX(-range, x->mv_row_min - fcenter_mv.row);
|
||||||
start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col);
|
start_col = VPXMAX(-range, x->mv_col_min - fcenter_mv.col);
|
||||||
end_row = VPXMIN(range, x->mv_row_max - ref_mv->row);
|
end_row = VPXMIN(range, x->mv_row_max - fcenter_mv.row);
|
||||||
end_col = VPXMIN(range, x->mv_col_max - ref_mv->col);
|
end_col = VPXMIN(range, x->mv_col_max - fcenter_mv.col);
|
||||||
|
|
||||||
for (r = start_row; r <= end_row; ++r) {
|
for (r = start_row; r <= end_row; r += step) {
|
||||||
for (c = start_col; c <= end_col; c += 4) {
|
for (c = start_col; c <= end_col; c += col_step) {
|
||||||
|
// Step > 1 means we are not checking every location in this pass.
|
||||||
|
if (step > 1) {
|
||||||
|
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c};
|
||||||
|
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
|
||||||
|
get_buf_from_mv(in_what, &mv), in_what->stride);
|
||||||
|
if (sad < best_sad) {
|
||||||
|
sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
|
||||||
|
if (sad < best_sad) {
|
||||||
|
best_sad = sad;
|
||||||
|
*best_mv = mv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 4 sads in a single call if we are checking every location
|
||||||
if (c + 3 <= end_col) {
|
if (c + 3 <= end_col) {
|
||||||
unsigned int sads[4];
|
unsigned int sads[4];
|
||||||
const uint8_t *addrs[4];
|
const uint8_t *addrs[4];
|
||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4; ++i) {
|
||||||
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
|
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
|
||||||
addrs[i] = get_buf_from_mv(in_what, &mv);
|
addrs[i] = get_buf_from_mv(in_what, &mv);
|
||||||
}
|
}
|
||||||
|
fn_ptr->sdx4df(what->buf, what->stride, addrs,
|
||||||
fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
|
in_what->stride, sads);
|
||||||
|
|
||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4; ++i) {
|
||||||
if (sads[i] < best_sad) {
|
if (sads[i] < best_sad) {
|
||||||
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
|
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
|
||||||
const unsigned int sad = sads[i] +
|
const unsigned int sad = sads[i] +
|
||||||
mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
|
mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
|
||||||
if (sad < best_sad) {
|
if (sad < best_sad) {
|
||||||
best_sad = sad;
|
best_sad = sad;
|
||||||
*best_mv = mv;
|
*best_mv = mv;
|
||||||
@@ -1572,11 +1585,11 @@ int vp9_full_range_search_c(const MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < end_col - c; ++i) {
|
for (i = 0; i < end_col - c; ++i) {
|
||||||
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
|
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
|
||||||
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
|
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
|
||||||
get_buf_from_mv(in_what, &mv), in_what->stride);
|
get_buf_from_mv(in_what, &mv), in_what->stride);
|
||||||
if (sad < best_sad) {
|
if (sad < best_sad) {
|
||||||
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
|
sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
|
||||||
if (sad < best_sad) {
|
if (sad < best_sad) {
|
||||||
best_sad = sad;
|
best_sad = sad;
|
||||||
*best_mv = mv;
|
*best_mv = mv;
|
||||||
@@ -1586,6 +1599,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return best_sad;
|
return best_sad;
|
||||||
}
|
}
|
||||||
@@ -2011,6 +2025,70 @@ static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
return bestsme;
|
return bestsme;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MIN_RANGE 7
|
||||||
|
#define MAX_RANGE 256
|
||||||
|
#define MIN_INTERVAL 1
|
||||||
|
// Runs an limited range exhaustive mesh search using a pattern set
|
||||||
|
// according to the encode speed profile.
|
||||||
|
static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
|
MV *centre_mv_full, int sadpb, int *cost_list,
|
||||||
|
const vp9_variance_fn_ptr_t *fn_ptr,
|
||||||
|
const MV *ref_mv, MV *dst_mv) {
|
||||||
|
const SPEED_FEATURES *const sf = &cpi->sf;
|
||||||
|
MV temp_mv = {centre_mv_full->row, centre_mv_full->col};
|
||||||
|
MV f_ref_mv = {ref_mv->row >> 3, ref_mv->col >> 3};
|
||||||
|
int bestsme;
|
||||||
|
int i;
|
||||||
|
int interval = sf->mesh_patterns[0].interval;
|
||||||
|
int range = sf->mesh_patterns[0].range;
|
||||||
|
int baseline_interval_divisor;
|
||||||
|
|
||||||
|
// Keep track of number of exhaustive calls (this frame in this thread).
|
||||||
|
++(*x->ex_search_count_ptr);
|
||||||
|
|
||||||
|
// Trap illegal values for interval and range for this function.
|
||||||
|
if ((range < MIN_RANGE) || (range > MAX_RANGE) ||
|
||||||
|
(interval < MIN_INTERVAL) || (interval > range))
|
||||||
|
return INT_MAX;
|
||||||
|
|
||||||
|
baseline_interval_divisor = range / interval;
|
||||||
|
|
||||||
|
// Check size of proposed first range against magnitude of the centre
|
||||||
|
// value used as a starting point.
|
||||||
|
range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
|
||||||
|
range = VPXMIN(range, MAX_RANGE);
|
||||||
|
interval = VPXMAX(interval, range / baseline_interval_divisor);
|
||||||
|
|
||||||
|
// initial search
|
||||||
|
bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range,
|
||||||
|
interval, sadpb, fn_ptr, &temp_mv);
|
||||||
|
|
||||||
|
if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
|
||||||
|
// Progressive searches with range and step size decreasing each time
|
||||||
|
// till we reach a step size of 1. Then break out.
|
||||||
|
for (i = 1; i < MAX_MESH_STEP; ++i) {
|
||||||
|
// First pass with coarser step and longer range
|
||||||
|
bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv,
|
||||||
|
sf->mesh_patterns[i].range,
|
||||||
|
sf->mesh_patterns[i].interval,
|
||||||
|
sadpb, fn_ptr, &temp_mv);
|
||||||
|
|
||||||
|
if (sf->mesh_patterns[i].interval == 1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestsme < INT_MAX)
|
||||||
|
bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
|
||||||
|
*dst_mv = temp_mv;
|
||||||
|
|
||||||
|
// Return cost list.
|
||||||
|
if (cost_list) {
|
||||||
|
calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
|
||||||
|
}
|
||||||
|
return bestsme;
|
||||||
|
}
|
||||||
|
|
||||||
int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
|
int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
|
||||||
int sad_per_bit, int distance,
|
int sad_per_bit, int distance,
|
||||||
const vp9_variance_fn_ptr_t *fn_ptr,
|
const vp9_variance_fn_ptr_t *fn_ptr,
|
||||||
@@ -2324,6 +2402,18 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
|
|||||||
return best_sad;
|
return best_sad;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MIN_EX_SEARCH_LIMIT 128
|
||||||
|
static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
|
||||||
|
const SPEED_FEATURES *const sf = &cpi->sf;
|
||||||
|
const int max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
|
||||||
|
(*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
|
||||||
|
|
||||||
|
return sf->allow_exhaustive_searches &&
|
||||||
|
(sf->exhaustive_searches_thresh < INT_MAX) &&
|
||||||
|
(*x->ex_search_count_ptr <= max_ex) &&
|
||||||
|
!cpi->rc.is_src_frame_alt_ref;
|
||||||
|
}
|
||||||
|
|
||||||
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
|
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
BLOCK_SIZE bsize, MV *mvp_full,
|
BLOCK_SIZE bsize, MV *mvp_full,
|
||||||
int step_param, int error_per_bit,
|
int step_param, int error_per_bit,
|
||||||
@@ -2342,6 +2432,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
cost_list[4] = INT_MAX;
|
cost_list[4] = INT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep track of number of searches (this frame in this thread).
|
||||||
|
++(*x->m_search_count_ptr);
|
||||||
|
|
||||||
switch (method) {
|
switch (method) {
|
||||||
case FAST_DIAMOND:
|
case FAST_DIAMOND:
|
||||||
var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
|
var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
|
||||||
@@ -2367,6 +2460,27 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
|
var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
|
||||||
MAX_MVSEARCH_STEPS - 1 - step_param,
|
MAX_MVSEARCH_STEPS - 1 - step_param,
|
||||||
1, cost_list, fn_ptr, ref_mv, tmp_mv);
|
1, cost_list, fn_ptr, ref_mv, tmp_mv);
|
||||||
|
|
||||||
|
// Should we allow a follow on exhaustive search?
|
||||||
|
if (is_exhaustive_allowed(cpi, x)) {
|
||||||
|
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
|
||||||
|
exhuastive_thr >>= 8 - (b_width_log2_lookup[bsize] +
|
||||||
|
b_height_log2_lookup[bsize]);
|
||||||
|
|
||||||
|
// Threshold variance for an exhaustive full search.
|
||||||
|
if (var > exhuastive_thr) {
|
||||||
|
int var_ex;
|
||||||
|
MV tmp_mv_ex;
|
||||||
|
var_ex = full_pixel_exhaustive(cpi, x, tmp_mv,
|
||||||
|
error_per_bit, cost_list, fn_ptr,
|
||||||
|
ref_mv, &tmp_mv_ex);
|
||||||
|
|
||||||
|
if (var_ex < var) {
|
||||||
|
var = var_ex;
|
||||||
|
*tmp_mv = tmp_mv_ex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0 && "Invalid search method.");
|
assert(0 && "Invalid search method.");
|
||||||
|
@@ -1750,8 +1750,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
|
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
|
||||||
ENTROPY_CONTEXT t_above[2], t_left[2];
|
ENTROPY_CONTEXT t_above[2], t_left[2];
|
||||||
int subpelmv = 1, have_ref = 0;
|
int subpelmv = 1, have_ref = 0;
|
||||||
|
SPEED_FEATURES *const sf = &cpi->sf;
|
||||||
const int has_second_rf = has_second_ref(mbmi);
|
const int has_second_rf = has_second_ref(mbmi);
|
||||||
const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
|
const int inter_mode_mask = sf->inter_mode_mask[bsize];
|
||||||
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
|
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
|
||||||
|
|
||||||
vp9_zero(*bsi);
|
vp9_zero(*bsi);
|
||||||
@@ -1820,7 +1821,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
|
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
|
||||||
MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
|
MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
|
||||||
int step_param = 0;
|
int step_param = 0;
|
||||||
int thissme, bestsme = INT_MAX;
|
int bestsme = INT_MAX;
|
||||||
int sadpb = x->sadperbit4;
|
int sadpb = x->sadperbit4;
|
||||||
MV mvp_full;
|
MV mvp_full;
|
||||||
int max_mv;
|
int max_mv;
|
||||||
@@ -1845,7 +1846,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
max_mv =
|
max_mv =
|
||||||
VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
|
VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
|
||||||
|
|
||||||
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
|
if (sf->mv.auto_mv_step_size && cm->show_frame) {
|
||||||
// Take wtd average of the step_params based on the last frame's
|
// Take wtd average of the step_params based on the last frame's
|
||||||
// max mv magnitude and the best ref mvs of the current block for
|
// max mv magnitude and the best ref mvs of the current block for
|
||||||
// the given reference.
|
// the given reference.
|
||||||
@@ -1858,7 +1859,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
mvp_full.row = bsi->mvp.as_mv.row >> 3;
|
mvp_full.row = bsi->mvp.as_mv.row >> 3;
|
||||||
mvp_full.col = bsi->mvp.as_mv.col >> 3;
|
mvp_full.col = bsi->mvp.as_mv.col >> 3;
|
||||||
|
|
||||||
if (cpi->sf.adaptive_motion_search) {
|
if (sf->adaptive_motion_search) {
|
||||||
mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
|
mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
|
||||||
mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
|
mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
|
||||||
step_param = VPXMAX(step_param, 8);
|
step_param = VPXMAX(step_param, 8);
|
||||||
@@ -1871,31 +1872,10 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
|
|
||||||
bestsme = vp9_full_pixel_search(
|
bestsme = vp9_full_pixel_search(
|
||||||
cpi, x, bsize, &mvp_full, step_param, sadpb,
|
cpi, x, bsize, &mvp_full, step_param, sadpb,
|
||||||
cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
|
sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
|
||||||
&bsi->ref_mv[0]->as_mv, new_mv,
|
&bsi->ref_mv[0]->as_mv, new_mv,
|
||||||
INT_MAX, 1);
|
INT_MAX, 1);
|
||||||
|
|
||||||
// Should we do a full search (best quality only)
|
|
||||||
if (cpi->oxcf.mode == BEST) {
|
|
||||||
int_mv *const best_mv = &mi->bmi[i].as_mv[0];
|
|
||||||
/* Check if mvp_full is within the range. */
|
|
||||||
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
|
|
||||||
x->mv_row_min, x->mv_row_max);
|
|
||||||
thissme = cpi->full_search_sad(x, &mvp_full,
|
|
||||||
sadpb, 16, &cpi->fn_ptr[bsize],
|
|
||||||
&bsi->ref_mv[0]->as_mv,
|
|
||||||
&best_mv->as_mv);
|
|
||||||
cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
|
|
||||||
if (thissme < bestsme) {
|
|
||||||
bestsme = thissme;
|
|
||||||
*new_mv = best_mv->as_mv;
|
|
||||||
} else {
|
|
||||||
// The full search result is actually worse so re-instate the
|
|
||||||
// previous best vector
|
|
||||||
best_mv->as_mv = *new_mv;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bestsme < INT_MAX) {
|
if (bestsme < INT_MAX) {
|
||||||
int distortion;
|
int distortion;
|
||||||
cpi->find_fractional_mv_step(
|
cpi->find_fractional_mv_step(
|
||||||
@@ -1904,8 +1884,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
&bsi->ref_mv[0]->as_mv,
|
&bsi->ref_mv[0]->as_mv,
|
||||||
cm->allow_high_precision_mv,
|
cm->allow_high_precision_mv,
|
||||||
x->errorperbit, &cpi->fn_ptr[bsize],
|
x->errorperbit, &cpi->fn_ptr[bsize],
|
||||||
cpi->sf.mv.subpel_force_stop,
|
sf->mv.subpel_force_stop,
|
||||||
cpi->sf.mv.subpel_iters_per_step,
|
sf->mv.subpel_iters_per_step,
|
||||||
cond_cost_list(cpi, cost_list),
|
cond_cost_list(cpi, cost_list),
|
||||||
x->nmvjointcost, x->mvcost,
|
x->nmvjointcost, x->mvcost,
|
||||||
&distortion,
|
&distortion,
|
||||||
@@ -1916,7 +1896,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
|
seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cpi->sf.adaptive_motion_search)
|
if (sf->adaptive_motion_search)
|
||||||
x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
|
x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
|
||||||
|
|
||||||
// restore src pointers
|
// restore src pointers
|
||||||
@@ -1933,7 +1913,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
mbmi->interp_filter == EIGHTTAP) {
|
mbmi->interp_filter == EIGHTTAP) {
|
||||||
// adjust src pointers
|
// adjust src pointers
|
||||||
mi_buf_shift(x, i);
|
mi_buf_shift(x, i);
|
||||||
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
|
if (sf->comp_inter_joint_search_thresh <= bsize) {
|
||||||
int rate_mv;
|
int rate_mv;
|
||||||
joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
|
joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
|
||||||
mi_row, mi_col, seg_mvs[i],
|
mi_row, mi_col, seg_mvs[i],
|
||||||
|
@@ -15,6 +15,22 @@
|
|||||||
#include "vp9/encoder/vp9_rdopt.h"
|
#include "vp9/encoder/vp9_rdopt.h"
|
||||||
#include "vpx_dsp/vpx_dsp_common.h"
|
#include "vpx_dsp/vpx_dsp_common.h"
|
||||||
|
|
||||||
|
// Mesh search patters for various speed settings
|
||||||
|
static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] =
|
||||||
|
{{64, 4}, {28, 2}, {15, 1}, {7, 1}, {1, 1}, {1, 1}};
|
||||||
|
|
||||||
|
#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
|
||||||
|
static MESH_PATTERN good_quality_mesh_patterns[MAX_MESH_SPEED + 1]
|
||||||
|
[MAX_MESH_STEP] =
|
||||||
|
{{{64, 8}, {28, 4}, {15, 1}, {7, 1}, {3, 1}, {2, 1}},
|
||||||
|
{{64, 8}, {28, 4}, {15, 1}, {7, 1}, {3, 1}, {2, 1}},
|
||||||
|
{{64, 8}, {14, 2}, {7, 1}, {7, 1}, {3, 1}, {2, 1}},
|
||||||
|
{{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }},
|
||||||
|
{{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }},
|
||||||
|
{{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }},
|
||||||
|
};
|
||||||
|
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] =
|
||||||
|
{50, 25, 15, 5, 1, 1};
|
||||||
|
|
||||||
// Intra only frames, golden frames (except alt ref overlays) and
|
// Intra only frames, golden frames (except alt ref overlays) and
|
||||||
// alt ref frames tend to be coded at a higher than ambient quality
|
// alt ref frames tend to be coded at a higher than ambient quality
|
||||||
@@ -259,6 +275,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
|||||||
sf->static_segmentation = 0;
|
sf->static_segmentation = 0;
|
||||||
sf->adaptive_rd_thresh = 1;
|
sf->adaptive_rd_thresh = 1;
|
||||||
sf->use_fast_coef_costing = 1;
|
sf->use_fast_coef_costing = 1;
|
||||||
|
sf->allow_exhaustive_searches = 0;
|
||||||
|
sf->exhaustive_searches_thresh = INT_MAX;
|
||||||
|
|
||||||
if (speed >= 1) {
|
if (speed >= 1) {
|
||||||
sf->use_square_partition_only = !frame_is_intra_only(cm);
|
sf->use_square_partition_only = !frame_is_intra_only(cm);
|
||||||
@@ -527,8 +545,36 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
|
|||||||
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
|
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
|
||||||
|
|
||||||
cpi->full_search_sad = vp9_full_search_sad;
|
cpi->full_search_sad = vp9_full_search_sad;
|
||||||
cpi->diamond_search_sad = oxcf->mode == BEST ? vp9_full_range_search
|
cpi->diamond_search_sad = vp9_diamond_search_sad;
|
||||||
: vp9_diamond_search_sad;
|
|
||||||
|
sf->allow_exhaustive_searches = 1;
|
||||||
|
if (oxcf->mode == BEST) {
|
||||||
|
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
|
||||||
|
sf->exhaustive_searches_thresh = (1 << 20);
|
||||||
|
else
|
||||||
|
sf->exhaustive_searches_thresh = (1 << 21);
|
||||||
|
sf->max_exaustive_pct = 100;
|
||||||
|
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||||
|
sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
|
||||||
|
sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
|
||||||
|
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
|
||||||
|
sf->exhaustive_searches_thresh = (1 << 22);
|
||||||
|
else
|
||||||
|
sf->exhaustive_searches_thresh = (1 << 23);
|
||||||
|
sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
|
||||||
|
if (speed > 0)
|
||||||
|
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||||
|
sf->mesh_patterns[i].range =
|
||||||
|
good_quality_mesh_patterns[speed][i].range;
|
||||||
|
sf->mesh_patterns[i].interval =
|
||||||
|
good_quality_mesh_patterns[speed][i].interval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Slow quant, dct and trellis not worthwhile for first pass
|
// Slow quant, dct and trellis not worthwhile for first pass
|
||||||
// so make sure they are always turned off.
|
// so make sure they are always turned off.
|
||||||
|
@@ -195,6 +195,13 @@ typedef struct MV_SPEED_FEATURES {
|
|||||||
int fullpel_search_step_param;
|
int fullpel_search_step_param;
|
||||||
} MV_SPEED_FEATURES;
|
} MV_SPEED_FEATURES;
|
||||||
|
|
||||||
|
#define MAX_MESH_STEP 6
|
||||||
|
|
||||||
|
typedef struct MESH_PATTERN {
|
||||||
|
int range;
|
||||||
|
int interval;
|
||||||
|
} MESH_PATTERN;
|
||||||
|
|
||||||
typedef struct SPEED_FEATURES {
|
typedef struct SPEED_FEATURES {
|
||||||
MV_SPEED_FEATURES mv;
|
MV_SPEED_FEATURES mv;
|
||||||
|
|
||||||
@@ -299,6 +306,18 @@ typedef struct SPEED_FEATURES {
|
|||||||
// point for this motion search and limits the search range around it.
|
// point for this motion search and limits the search range around it.
|
||||||
int adaptive_motion_search;
|
int adaptive_motion_search;
|
||||||
|
|
||||||
|
// Flag for allowing some use of exhaustive searches;
|
||||||
|
int allow_exhaustive_searches;
|
||||||
|
|
||||||
|
// Threshold for allowing exhaistive motion search.
|
||||||
|
int exhaustive_searches_thresh;
|
||||||
|
|
||||||
|
// Maximum number of exhaustive searches for a frame.
|
||||||
|
int max_exaustive_pct;
|
||||||
|
|
||||||
|
// Pattern to be used for any exhaustive mesh searches.
|
||||||
|
MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
|
||||||
|
|
||||||
int schedule_mode_search;
|
int schedule_mode_search;
|
||||||
|
|
||||||
// Allows sub 8x8 modes to use the prediction filter that was determined
|
// Allows sub 8x8 modes to use the prediction filter that was determined
|
||||||
|
Reference in New Issue
Block a user