Merge "Changes to exhaustive motion search."
This commit is contained in:
commit
85aea16f17
@ -314,9 +314,6 @@ $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
|
|||||||
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
|
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
|
||||||
specialize qw/vp9_diamond_search_sad avx/;
|
specialize qw/vp9_diamond_search_sad avx/;
|
||||||
|
|
||||||
add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
|
|
||||||
specialize qw/vp9_full_range_search/;
|
|
||||||
|
|
||||||
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
|
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
|
||||||
specialize qw/vp9_temporal_filter_apply sse2 msa/;
|
specialize qw/vp9_temporal_filter_apply sse2 msa/;
|
||||||
|
|
||||||
|
@ -71,6 +71,8 @@ struct macroblock {
|
|||||||
int rddiv;
|
int rddiv;
|
||||||
int rdmult;
|
int rdmult;
|
||||||
int mb_energy;
|
int mb_energy;
|
||||||
|
int * m_search_count_ptr;
|
||||||
|
int * ex_search_count_ptr;
|
||||||
|
|
||||||
// These are set to their default values at the beginning, and then adjusted
|
// These are set to their default values at the beginning, and then adjusted
|
||||||
// further in the encoding process.
|
// further in the encoding process.
|
||||||
|
@ -3839,6 +3839,10 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td,
|
|||||||
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
|
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
|
||||||
int mi_row;
|
int mi_row;
|
||||||
|
|
||||||
|
// Set up pointers to per thread motion search counters.
|
||||||
|
td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
|
||||||
|
td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
|
||||||
|
|
||||||
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
|
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
|
||||||
mi_row += MI_BLOCK_SIZE) {
|
mi_row += MI_BLOCK_SIZE) {
|
||||||
if (cpi->sf.use_nonrd_pick_mode)
|
if (cpi->sf.use_nonrd_pick_mode)
|
||||||
@ -3895,6 +3899,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
|||||||
vp9_zero(rdc->coef_counts);
|
vp9_zero(rdc->coef_counts);
|
||||||
vp9_zero(rdc->comp_pred_diff);
|
vp9_zero(rdc->comp_pred_diff);
|
||||||
vp9_zero(rdc->filter_diff);
|
vp9_zero(rdc->filter_diff);
|
||||||
|
rdc->m_search_count = 0; // Count of motion search hits.
|
||||||
|
rdc->ex_search_count = 0; // Exhaustive mesh search hits.
|
||||||
|
|
||||||
|
|
||||||
xd->lossless = cm->base_qindex == 0 &&
|
xd->lossless = cm->base_qindex == 0 &&
|
||||||
cm->y_dc_delta_q == 0 &&
|
cm->y_dc_delta_q == 0 &&
|
||||||
|
@ -2995,7 +2995,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
|
|||||||
recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
|
recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
|
||||||
|
|
||||||
if (cpi->twopass.total_left_stats.coded_error != 0.0)
|
if (cpi->twopass.total_left_stats.coded_error != 0.0)
|
||||||
fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
|
fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
|
||||||
"%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
|
"%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
|
||||||
"%10"PRId64" %10"PRId64" %10d "
|
"%10"PRId64" %10"PRId64" %10d "
|
||||||
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
|
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
|
||||||
@ -3004,6 +3004,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
|
|||||||
"%10lf %8u %10"PRId64" %10d %10d %10d\n",
|
"%10lf %8u %10"PRId64" %10d %10d %10d\n",
|
||||||
cpi->common.current_video_frame,
|
cpi->common.current_video_frame,
|
||||||
cm->width, cm->height,
|
cm->width, cm->height,
|
||||||
|
cpi->td.rd_counts.m_search_count,
|
||||||
|
cpi->td.rd_counts.ex_search_count,
|
||||||
cpi->rc.source_alt_ref_pending,
|
cpi->rc.source_alt_ref_pending,
|
||||||
cpi->rc.source_alt_ref_active,
|
cpi->rc.source_alt_ref_active,
|
||||||
cpi->rc.this_frame_target,
|
cpi->rc.this_frame_target,
|
||||||
|
@ -260,6 +260,8 @@ typedef struct RD_COUNTS {
|
|||||||
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
|
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
|
||||||
int64_t comp_pred_diff[REFERENCE_MODES];
|
int64_t comp_pred_diff[REFERENCE_MODES];
|
||||||
int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
|
int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
|
||||||
|
int m_search_count;
|
||||||
|
int ex_search_count;
|
||||||
} RD_COUNTS;
|
} RD_COUNTS;
|
||||||
|
|
||||||
typedef struct ThreadData {
|
typedef struct ThreadData {
|
||||||
|
@ -30,6 +30,10 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
|
|||||||
for (n = 0; n < ENTROPY_TOKENS; n++)
|
for (n = 0; n < ENTROPY_TOKENS; n++)
|
||||||
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
|
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
|
||||||
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
|
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
|
||||||
|
|
||||||
|
// Counts of all motion searches and exhuastive mesh searches.
|
||||||
|
td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
|
||||||
|
td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
|
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
|
||||||
|
@ -1517,69 +1517,83 @@ static int fast_dia_search(const MACROBLOCK *x,
|
|||||||
|
|
||||||
#undef CHECK_BETTER
|
#undef CHECK_BETTER
|
||||||
|
|
||||||
int vp9_full_range_search_c(const MACROBLOCK *x,
|
// Exhuastive motion search around a given centre position with a given
|
||||||
const search_site_config *cfg,
|
// step size.
|
||||||
MV *ref_mv, MV *best_mv,
|
static int exhuastive_mesh_search(const MACROBLOCK *x,
|
||||||
int search_param, int sad_per_bit, int *num00,
|
MV *ref_mv, MV *best_mv,
|
||||||
const vp9_variance_fn_ptr_t *fn_ptr,
|
int range, int step, int sad_per_bit,
|
||||||
const MV *center_mv) {
|
const vp9_variance_fn_ptr_t *fn_ptr,
|
||||||
|
const MV *center_mv) {
|
||||||
const MACROBLOCKD *const xd = &x->e_mbd;
|
const MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
const struct buf_2d *const what = &x->plane[0].src;
|
const struct buf_2d *const what = &x->plane[0].src;
|
||||||
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
|
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
|
||||||
const int range = 64;
|
MV fcenter_mv = {center_mv->row, center_mv->col};
|
||||||
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
|
|
||||||
unsigned int best_sad = INT_MAX;
|
unsigned int best_sad = INT_MAX;
|
||||||
int r, c, i;
|
int r, c, i;
|
||||||
int start_col, end_col, start_row, end_row;
|
int start_col, end_col, start_row, end_row;
|
||||||
|
int col_step = (step > 1) ? step : 4;
|
||||||
|
|
||||||
// The cfg and search_param parameters are not used in this search variant
|
assert(step >= 1);
|
||||||
(void)cfg;
|
|
||||||
(void)search_param;
|
|
||||||
|
|
||||||
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
|
clamp_mv(&fcenter_mv, x->mv_col_min, x->mv_col_max,
|
||||||
*best_mv = *ref_mv;
|
x->mv_row_min, x->mv_row_max);
|
||||||
*num00 = 11;
|
*best_mv = fcenter_mv;
|
||||||
best_sad = fn_ptr->sdf(what->buf, what->stride,
|
best_sad = fn_ptr->sdf(what->buf, what->stride,
|
||||||
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
|
get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
|
||||||
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
|
mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
|
||||||
start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row);
|
start_row = VPXMAX(-range, x->mv_row_min - fcenter_mv.row);
|
||||||
start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col);
|
start_col = VPXMAX(-range, x->mv_col_min - fcenter_mv.col);
|
||||||
end_row = VPXMIN(range, x->mv_row_max - ref_mv->row);
|
end_row = VPXMIN(range, x->mv_row_max - fcenter_mv.row);
|
||||||
end_col = VPXMIN(range, x->mv_col_max - ref_mv->col);
|
end_col = VPXMIN(range, x->mv_col_max - fcenter_mv.col);
|
||||||
|
|
||||||
for (r = start_row; r <= end_row; ++r) {
|
for (r = start_row; r <= end_row; r += step) {
|
||||||
for (c = start_col; c <= end_col; c += 4) {
|
for (c = start_col; c <= end_col; c += col_step) {
|
||||||
if (c + 3 <= end_col) {
|
// Step > 1 means we are not checking every location in this pass.
|
||||||
unsigned int sads[4];
|
if (step > 1) {
|
||||||
const uint8_t *addrs[4];
|
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c};
|
||||||
for (i = 0; i < 4; ++i) {
|
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
|
||||||
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
|
get_buf_from_mv(in_what, &mv), in_what->stride);
|
||||||
addrs[i] = get_buf_from_mv(in_what, &mv);
|
if (sad < best_sad) {
|
||||||
}
|
sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
|
||||||
|
if (sad < best_sad) {
|
||||||
fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
|
best_sad = sad;
|
||||||
|
*best_mv = mv;
|
||||||
for (i = 0; i < 4; ++i) {
|
|
||||||
if (sads[i] < best_sad) {
|
|
||||||
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
|
|
||||||
const unsigned int sad = sads[i] +
|
|
||||||
mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
|
|
||||||
if (sad < best_sad) {
|
|
||||||
best_sad = sad;
|
|
||||||
*best_mv = mv;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < end_col - c; ++i) {
|
// 4 sads in a single call if we are checking every location
|
||||||
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
|
if (c + 3 <= end_col) {
|
||||||
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
|
unsigned int sads[4];
|
||||||
get_buf_from_mv(in_what, &mv), in_what->stride);
|
const uint8_t *addrs[4];
|
||||||
if (sad < best_sad) {
|
for (i = 0; i < 4; ++i) {
|
||||||
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
|
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
|
||||||
|
addrs[i] = get_buf_from_mv(in_what, &mv);
|
||||||
|
}
|
||||||
|
fn_ptr->sdx4df(what->buf, what->stride, addrs,
|
||||||
|
in_what->stride, sads);
|
||||||
|
|
||||||
|
for (i = 0; i < 4; ++i) {
|
||||||
|
if (sads[i] < best_sad) {
|
||||||
|
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
|
||||||
|
const unsigned int sad = sads[i] +
|
||||||
|
mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
|
||||||
|
if (sad < best_sad) {
|
||||||
|
best_sad = sad;
|
||||||
|
*best_mv = mv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < end_col - c; ++i) {
|
||||||
|
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
|
||||||
|
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
|
||||||
|
get_buf_from_mv(in_what, &mv), in_what->stride);
|
||||||
if (sad < best_sad) {
|
if (sad < best_sad) {
|
||||||
best_sad = sad;
|
sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
|
||||||
*best_mv = mv;
|
if (sad < best_sad) {
|
||||||
|
best_sad = sad;
|
||||||
|
*best_mv = mv;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2011,6 +2025,70 @@ static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
return bestsme;
|
return bestsme;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MIN_RANGE 7
|
||||||
|
#define MAX_RANGE 256
|
||||||
|
#define MIN_INTERVAL 1
|
||||||
|
// Runs an limited range exhaustive mesh search using a pattern set
|
||||||
|
// according to the encode speed profile.
|
||||||
|
static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
|
MV *centre_mv_full, int sadpb, int *cost_list,
|
||||||
|
const vp9_variance_fn_ptr_t *fn_ptr,
|
||||||
|
const MV *ref_mv, MV *dst_mv) {
|
||||||
|
const SPEED_FEATURES *const sf = &cpi->sf;
|
||||||
|
MV temp_mv = {centre_mv_full->row, centre_mv_full->col};
|
||||||
|
MV f_ref_mv = {ref_mv->row >> 3, ref_mv->col >> 3};
|
||||||
|
int bestsme;
|
||||||
|
int i;
|
||||||
|
int interval = sf->mesh_patterns[0].interval;
|
||||||
|
int range = sf->mesh_patterns[0].range;
|
||||||
|
int baseline_interval_divisor;
|
||||||
|
|
||||||
|
// Keep track of number of exhaustive calls (this frame in this thread).
|
||||||
|
++(*x->ex_search_count_ptr);
|
||||||
|
|
||||||
|
// Trap illegal values for interval and range for this function.
|
||||||
|
if ((range < MIN_RANGE) || (range > MAX_RANGE) ||
|
||||||
|
(interval < MIN_INTERVAL) || (interval > range))
|
||||||
|
return INT_MAX;
|
||||||
|
|
||||||
|
baseline_interval_divisor = range / interval;
|
||||||
|
|
||||||
|
// Check size of proposed first range against magnitude of the centre
|
||||||
|
// value used as a starting point.
|
||||||
|
range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
|
||||||
|
range = VPXMIN(range, MAX_RANGE);
|
||||||
|
interval = VPXMAX(interval, range / baseline_interval_divisor);
|
||||||
|
|
||||||
|
// initial search
|
||||||
|
bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range,
|
||||||
|
interval, sadpb, fn_ptr, &temp_mv);
|
||||||
|
|
||||||
|
if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
|
||||||
|
// Progressive searches with range and step size decreasing each time
|
||||||
|
// till we reach a step size of 1. Then break out.
|
||||||
|
for (i = 1; i < MAX_MESH_STEP; ++i) {
|
||||||
|
// First pass with coarser step and longer range
|
||||||
|
bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv,
|
||||||
|
sf->mesh_patterns[i].range,
|
||||||
|
sf->mesh_patterns[i].interval,
|
||||||
|
sadpb, fn_ptr, &temp_mv);
|
||||||
|
|
||||||
|
if (sf->mesh_patterns[i].interval == 1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestsme < INT_MAX)
|
||||||
|
bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
|
||||||
|
*dst_mv = temp_mv;
|
||||||
|
|
||||||
|
// Return cost list.
|
||||||
|
if (cost_list) {
|
||||||
|
calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
|
||||||
|
}
|
||||||
|
return bestsme;
|
||||||
|
}
|
||||||
|
|
||||||
int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
|
int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
|
||||||
int sad_per_bit, int distance,
|
int sad_per_bit, int distance,
|
||||||
const vp9_variance_fn_ptr_t *fn_ptr,
|
const vp9_variance_fn_ptr_t *fn_ptr,
|
||||||
@ -2324,6 +2402,18 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
|
|||||||
return best_sad;
|
return best_sad;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MIN_EX_SEARCH_LIMIT 128
|
||||||
|
static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
|
||||||
|
const SPEED_FEATURES *const sf = &cpi->sf;
|
||||||
|
const int max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
|
||||||
|
(*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
|
||||||
|
|
||||||
|
return sf->allow_exhaustive_searches &&
|
||||||
|
(sf->exhaustive_searches_thresh < INT_MAX) &&
|
||||||
|
(*x->ex_search_count_ptr <= max_ex) &&
|
||||||
|
!cpi->rc.is_src_frame_alt_ref;
|
||||||
|
}
|
||||||
|
|
||||||
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
|
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
BLOCK_SIZE bsize, MV *mvp_full,
|
BLOCK_SIZE bsize, MV *mvp_full,
|
||||||
int step_param, int error_per_bit,
|
int step_param, int error_per_bit,
|
||||||
@ -2342,6 +2432,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
cost_list[4] = INT_MAX;
|
cost_list[4] = INT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep track of number of searches (this frame in this thread).
|
||||||
|
++(*x->m_search_count_ptr);
|
||||||
|
|
||||||
switch (method) {
|
switch (method) {
|
||||||
case FAST_DIAMOND:
|
case FAST_DIAMOND:
|
||||||
var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
|
var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
|
||||||
@ -2367,6 +2460,27 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
|
var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
|
||||||
MAX_MVSEARCH_STEPS - 1 - step_param,
|
MAX_MVSEARCH_STEPS - 1 - step_param,
|
||||||
1, cost_list, fn_ptr, ref_mv, tmp_mv);
|
1, cost_list, fn_ptr, ref_mv, tmp_mv);
|
||||||
|
|
||||||
|
// Should we allow a follow on exhaustive search?
|
||||||
|
if (is_exhaustive_allowed(cpi, x)) {
|
||||||
|
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
|
||||||
|
exhuastive_thr >>= 8 - (b_width_log2_lookup[bsize] +
|
||||||
|
b_height_log2_lookup[bsize]);
|
||||||
|
|
||||||
|
// Threshold variance for an exhaustive full search.
|
||||||
|
if (var > exhuastive_thr) {
|
||||||
|
int var_ex;
|
||||||
|
MV tmp_mv_ex;
|
||||||
|
var_ex = full_pixel_exhaustive(cpi, x, tmp_mv,
|
||||||
|
error_per_bit, cost_list, fn_ptr,
|
||||||
|
ref_mv, &tmp_mv_ex);
|
||||||
|
|
||||||
|
if (var_ex < var) {
|
||||||
|
var = var_ex;
|
||||||
|
*tmp_mv = tmp_mv_ex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0 && "Invalid search method.");
|
assert(0 && "Invalid search method.");
|
||||||
|
@ -1750,8 +1750,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
|
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
|
||||||
ENTROPY_CONTEXT t_above[2], t_left[2];
|
ENTROPY_CONTEXT t_above[2], t_left[2];
|
||||||
int subpelmv = 1, have_ref = 0;
|
int subpelmv = 1, have_ref = 0;
|
||||||
|
SPEED_FEATURES *const sf = &cpi->sf;
|
||||||
const int has_second_rf = has_second_ref(mbmi);
|
const int has_second_rf = has_second_ref(mbmi);
|
||||||
const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
|
const int inter_mode_mask = sf->inter_mode_mask[bsize];
|
||||||
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
|
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
|
||||||
|
|
||||||
vp9_zero(*bsi);
|
vp9_zero(*bsi);
|
||||||
@ -1820,7 +1821,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
|
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
|
||||||
MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
|
MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
|
||||||
int step_param = 0;
|
int step_param = 0;
|
||||||
int thissme, bestsme = INT_MAX;
|
int bestsme = INT_MAX;
|
||||||
int sadpb = x->sadperbit4;
|
int sadpb = x->sadperbit4;
|
||||||
MV mvp_full;
|
MV mvp_full;
|
||||||
int max_mv;
|
int max_mv;
|
||||||
@ -1845,7 +1846,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
max_mv =
|
max_mv =
|
||||||
VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
|
VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
|
||||||
|
|
||||||
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
|
if (sf->mv.auto_mv_step_size && cm->show_frame) {
|
||||||
// Take wtd average of the step_params based on the last frame's
|
// Take wtd average of the step_params based on the last frame's
|
||||||
// max mv magnitude and the best ref mvs of the current block for
|
// max mv magnitude and the best ref mvs of the current block for
|
||||||
// the given reference.
|
// the given reference.
|
||||||
@ -1858,7 +1859,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
mvp_full.row = bsi->mvp.as_mv.row >> 3;
|
mvp_full.row = bsi->mvp.as_mv.row >> 3;
|
||||||
mvp_full.col = bsi->mvp.as_mv.col >> 3;
|
mvp_full.col = bsi->mvp.as_mv.col >> 3;
|
||||||
|
|
||||||
if (cpi->sf.adaptive_motion_search) {
|
if (sf->adaptive_motion_search) {
|
||||||
mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
|
mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
|
||||||
mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
|
mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
|
||||||
step_param = VPXMAX(step_param, 8);
|
step_param = VPXMAX(step_param, 8);
|
||||||
@ -1871,31 +1872,10 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
|
|
||||||
bestsme = vp9_full_pixel_search(
|
bestsme = vp9_full_pixel_search(
|
||||||
cpi, x, bsize, &mvp_full, step_param, sadpb,
|
cpi, x, bsize, &mvp_full, step_param, sadpb,
|
||||||
cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
|
sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
|
||||||
&bsi->ref_mv[0]->as_mv, new_mv,
|
&bsi->ref_mv[0]->as_mv, new_mv,
|
||||||
INT_MAX, 1);
|
INT_MAX, 1);
|
||||||
|
|
||||||
// Should we do a full search (best quality only)
|
|
||||||
if (cpi->oxcf.mode == BEST) {
|
|
||||||
int_mv *const best_mv = &mi->bmi[i].as_mv[0];
|
|
||||||
/* Check if mvp_full is within the range. */
|
|
||||||
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
|
|
||||||
x->mv_row_min, x->mv_row_max);
|
|
||||||
thissme = cpi->full_search_sad(x, &mvp_full,
|
|
||||||
sadpb, 16, &cpi->fn_ptr[bsize],
|
|
||||||
&bsi->ref_mv[0]->as_mv,
|
|
||||||
&best_mv->as_mv);
|
|
||||||
cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
|
|
||||||
if (thissme < bestsme) {
|
|
||||||
bestsme = thissme;
|
|
||||||
*new_mv = best_mv->as_mv;
|
|
||||||
} else {
|
|
||||||
// The full search result is actually worse so re-instate the
|
|
||||||
// previous best vector
|
|
||||||
best_mv->as_mv = *new_mv;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bestsme < INT_MAX) {
|
if (bestsme < INT_MAX) {
|
||||||
int distortion;
|
int distortion;
|
||||||
cpi->find_fractional_mv_step(
|
cpi->find_fractional_mv_step(
|
||||||
@ -1904,8 +1884,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
&bsi->ref_mv[0]->as_mv,
|
&bsi->ref_mv[0]->as_mv,
|
||||||
cm->allow_high_precision_mv,
|
cm->allow_high_precision_mv,
|
||||||
x->errorperbit, &cpi->fn_ptr[bsize],
|
x->errorperbit, &cpi->fn_ptr[bsize],
|
||||||
cpi->sf.mv.subpel_force_stop,
|
sf->mv.subpel_force_stop,
|
||||||
cpi->sf.mv.subpel_iters_per_step,
|
sf->mv.subpel_iters_per_step,
|
||||||
cond_cost_list(cpi, cost_list),
|
cond_cost_list(cpi, cost_list),
|
||||||
x->nmvjointcost, x->mvcost,
|
x->nmvjointcost, x->mvcost,
|
||||||
&distortion,
|
&distortion,
|
||||||
@ -1916,7 +1896,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
|
seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cpi->sf.adaptive_motion_search)
|
if (sf->adaptive_motion_search)
|
||||||
x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
|
x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
|
||||||
|
|
||||||
// restore src pointers
|
// restore src pointers
|
||||||
@ -1933,7 +1913,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
mbmi->interp_filter == EIGHTTAP) {
|
mbmi->interp_filter == EIGHTTAP) {
|
||||||
// adjust src pointers
|
// adjust src pointers
|
||||||
mi_buf_shift(x, i);
|
mi_buf_shift(x, i);
|
||||||
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
|
if (sf->comp_inter_joint_search_thresh <= bsize) {
|
||||||
int rate_mv;
|
int rate_mv;
|
||||||
joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
|
joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
|
||||||
mi_row, mi_col, seg_mvs[i],
|
mi_row, mi_col, seg_mvs[i],
|
||||||
|
@ -15,6 +15,22 @@
|
|||||||
#include "vp9/encoder/vp9_rdopt.h"
|
#include "vp9/encoder/vp9_rdopt.h"
|
||||||
#include "vpx_dsp/vpx_dsp_common.h"
|
#include "vpx_dsp/vpx_dsp_common.h"
|
||||||
|
|
||||||
|
// Mesh search patters for various speed settings
|
||||||
|
static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] =
|
||||||
|
{{64, 4}, {28, 2}, {15, 1}, {7, 1}, {1, 1}, {1, 1}};
|
||||||
|
|
||||||
|
#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
|
||||||
|
static MESH_PATTERN good_quality_mesh_patterns[MAX_MESH_SPEED + 1]
|
||||||
|
[MAX_MESH_STEP] =
|
||||||
|
{{{64, 8}, {28, 4}, {15, 1}, {7, 1}, {3, 1}, {2, 1}},
|
||||||
|
{{64, 8}, {28, 4}, {15, 1}, {7, 1}, {3, 1}, {2, 1}},
|
||||||
|
{{64, 8}, {14, 2}, {7, 1}, {7, 1}, {3, 1}, {2, 1}},
|
||||||
|
{{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }},
|
||||||
|
{{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }},
|
||||||
|
{{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }},
|
||||||
|
};
|
||||||
|
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] =
|
||||||
|
{50, 25, 15, 5, 1, 1};
|
||||||
|
|
||||||
// Intra only frames, golden frames (except alt ref overlays) and
|
// Intra only frames, golden frames (except alt ref overlays) and
|
||||||
// alt ref frames tend to be coded at a higher than ambient quality
|
// alt ref frames tend to be coded at a higher than ambient quality
|
||||||
@ -259,6 +275,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
|||||||
sf->static_segmentation = 0;
|
sf->static_segmentation = 0;
|
||||||
sf->adaptive_rd_thresh = 1;
|
sf->adaptive_rd_thresh = 1;
|
||||||
sf->use_fast_coef_costing = 1;
|
sf->use_fast_coef_costing = 1;
|
||||||
|
sf->allow_exhaustive_searches = 0;
|
||||||
|
sf->exhaustive_searches_thresh = INT_MAX;
|
||||||
|
|
||||||
if (speed >= 1) {
|
if (speed >= 1) {
|
||||||
sf->use_square_partition_only = !frame_is_intra_only(cm);
|
sf->use_square_partition_only = !frame_is_intra_only(cm);
|
||||||
@ -527,8 +545,36 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
|
|||||||
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
|
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
|
||||||
|
|
||||||
cpi->full_search_sad = vp9_full_search_sad;
|
cpi->full_search_sad = vp9_full_search_sad;
|
||||||
cpi->diamond_search_sad = oxcf->mode == BEST ? vp9_full_range_search
|
cpi->diamond_search_sad = vp9_diamond_search_sad;
|
||||||
: vp9_diamond_search_sad;
|
|
||||||
|
sf->allow_exhaustive_searches = 1;
|
||||||
|
if (oxcf->mode == BEST) {
|
||||||
|
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
|
||||||
|
sf->exhaustive_searches_thresh = (1 << 20);
|
||||||
|
else
|
||||||
|
sf->exhaustive_searches_thresh = (1 << 21);
|
||||||
|
sf->max_exaustive_pct = 100;
|
||||||
|
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||||
|
sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
|
||||||
|
sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
|
||||||
|
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
|
||||||
|
sf->exhaustive_searches_thresh = (1 << 22);
|
||||||
|
else
|
||||||
|
sf->exhaustive_searches_thresh = (1 << 23);
|
||||||
|
sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
|
||||||
|
if (speed > 0)
|
||||||
|
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_MESH_STEP; ++i) {
|
||||||
|
sf->mesh_patterns[i].range =
|
||||||
|
good_quality_mesh_patterns[speed][i].range;
|
||||||
|
sf->mesh_patterns[i].interval =
|
||||||
|
good_quality_mesh_patterns[speed][i].interval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Slow quant, dct and trellis not worthwhile for first pass
|
// Slow quant, dct and trellis not worthwhile for first pass
|
||||||
// so make sure they are always turned off.
|
// so make sure they are always turned off.
|
||||||
|
@ -195,6 +195,13 @@ typedef struct MV_SPEED_FEATURES {
|
|||||||
int fullpel_search_step_param;
|
int fullpel_search_step_param;
|
||||||
} MV_SPEED_FEATURES;
|
} MV_SPEED_FEATURES;
|
||||||
|
|
||||||
|
#define MAX_MESH_STEP 6
|
||||||
|
|
||||||
|
typedef struct MESH_PATTERN {
|
||||||
|
int range;
|
||||||
|
int interval;
|
||||||
|
} MESH_PATTERN;
|
||||||
|
|
||||||
typedef struct SPEED_FEATURES {
|
typedef struct SPEED_FEATURES {
|
||||||
MV_SPEED_FEATURES mv;
|
MV_SPEED_FEATURES mv;
|
||||||
|
|
||||||
@ -299,6 +306,18 @@ typedef struct SPEED_FEATURES {
|
|||||||
// point for this motion search and limits the search range around it.
|
// point for this motion search and limits the search range around it.
|
||||||
int adaptive_motion_search;
|
int adaptive_motion_search;
|
||||||
|
|
||||||
|
// Flag for allowing some use of exhaustive searches;
|
||||||
|
int allow_exhaustive_searches;
|
||||||
|
|
||||||
|
// Threshold for allowing exhaistive motion search.
|
||||||
|
int exhaustive_searches_thresh;
|
||||||
|
|
||||||
|
// Maximum number of exhaustive searches for a frame.
|
||||||
|
int max_exaustive_pct;
|
||||||
|
|
||||||
|
// Pattern to be used for any exhaustive mesh searches.
|
||||||
|
MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
|
||||||
|
|
||||||
int schedule_mode_search;
|
int schedule_mode_search;
|
||||||
|
|
||||||
// Allows sub 8x8 modes to use the prediction filter that was determined
|
// Allows sub 8x8 modes to use the prediction filter that was determined
|
||||||
|
Loading…
x
Reference in New Issue
Block a user