From d78dbff09a1b3166fdd5be07b5227179e6e2dfcb Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 7 Oct 2014 02:48:08 -0700 Subject: [PATCH] Subpel search cleanups and enhancements - Some fixes to surface fit. - Returns variance function as cost rather than sad in the pattern search and diamond search functions. Only vp9_pattern_search_sad function used in bigdia search uses sad as integer 1-away costs. - Deploys SUBPEL_TREE_PRUNED_MORE for speed 4+. Results: derf [Speed 3]: About +0.036% in coding efficiency without any discernible speed loss. derf [Speed 4]: About 2-3% faster at -0.199% loss in coding efficiency. derf [Speed 5]: About 3-4% faster at -0.149% loss in coding efficiency. Change-Id: I8462f94f6adb46966ca964f2bd0400977357fd63 --- vp9/encoder/vp9_encoder.h | 4 +- vp9/encoder/vp9_mbgraph.c | 6 +- vp9/encoder/vp9_mcomp.c | 325 +++++++++++++++--------------- vp9/encoder/vp9_mcomp.h | 8 +- vp9/encoder/vp9_pickmode.c | 6 +- vp9/encoder/vp9_rdopt.c | 14 +- vp9/encoder/vp9_speed_features.c | 6 +- vp9/encoder/vp9_speed_features.h | 6 +- vp9/encoder/vp9_temporal_filter.c | 6 +- 9 files changed, 194 insertions(+), 187 deletions(-) diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 49df5b0f1..e677b7625 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -531,8 +531,8 @@ static INLINE int get_chessboard_index(const int frame_index) { return frame_index & 0x1; } -static INLINE int *cond_sad_list(const struct VP9_COMP *cpi, int *sad_list) { - return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL; +static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) { + return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL; } #ifdef __cplusplus diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 42981d816..bd04c56a4 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -34,7 +34,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const int tmp_row_min = x->mv_row_min; const int tmp_row_max = x->mv_row_max; MV ref_full; - int sad_list[5]; + int cost_list[5]; // Further step/diamond searches as necessary int step_param = mv_sf->reduce_first_step_size; @@ -47,7 +47,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, /*cpi->sf.search_method == HEX*/ vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), &v_fn_ptr, 0, ref_mv, dst_mv); // Try sub-pixel MC @@ -58,7 +58,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, cpi->find_fractional_mv_step( x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0, mv_sf->subpel_iters_per_step, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0); } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 258b45994..f69f15510 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -327,11 +327,11 @@ static INLINE int divide_and_round(const int n, const int d) { return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); } -static INLINE int is_sad_list_wellbehaved(int *sad_list) { - return sad_list[0] >= sad_list[1] && - sad_list[0] >= sad_list[2] && - sad_list[0] >= sad_list[3] && - sad_list[0] >= sad_list[4]; +static INLINE int is_cost_list_wellbehaved(int *cost_list) { + return cost_list[0] < cost_list[1] && + cost_list[0] < cost_list[2] && + cost_list[0] < cost_list[3] && + cost_list[0] < cost_list[4]; } // Returns surface minima estimate at given precision in 1/2^n bits. @@ -342,27 +342,28 @@ static INLINE int is_sad_list_wellbehaved(int *sad_list) { // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). // The code below is an integerized version of that. -static void get_cost_surf_min(int *sad_list, int *ir, int *ic, +static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { - *ic = divide_and_round((sad_list[1] - sad_list[3]) * (1 << (bits - 1)), - (sad_list[1] - 2 * sad_list[0] + sad_list[3])); - *ir = divide_and_round((sad_list[4] - sad_list[2]) * (1 << (bits - 1)), - (sad_list[4] - 2 * sad_list[0] + sad_list[2])); + *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)), + (cost_list[1] - 2 * cost_list[0] + cost_list[3])); + *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)), + (cost_list[4] - 2 * cost_list[0] + cost_list[2])); } -int vp9_find_best_sub_pixel_surface_fit(const MACROBLOCK *x, - MV *bestmv, const MV *ref_mv, - int allow_hp, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int forced_stop, - int iters_per_step, - int *sad_list, - int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1, - const uint8_t *second_pred, - int w, int h) { +int vp9_find_best_sub_pixel_tree_pruned_evenmore( + const MACROBLOCK *x, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *cost_list, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + const uint8_t *second_pred, + int w, int h) { SETUP_SUBPEL_SEARCH; SETUP_CENTER_ERROR; (void) halfiters; @@ -373,16 +374,46 @@ int vp9_find_best_sub_pixel_surface_fit(const MACROBLOCK *x, (void) forced_stop; (void) hstep; - if (sad_list && - sad_list[0] != INT_MAX && sad_list[1] != INT_MAX && - sad_list[2] != INT_MAX && sad_list[3] != INT_MAX && - sad_list[4] != INT_MAX && - is_sad_list_wellbehaved(sad_list)) { + if (cost_list && + cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && + cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && + cost_list[4] != INT_MAX && + is_cost_list_wellbehaved(cost_list)) { int ir, ic; unsigned int minpt; - get_cost_surf_min(sad_list, &ir, &ic, 3); + get_cost_surf_min(cost_list, &ir, &ic, 2); if (ir != 0 || ic != 0) { - CHECK_BETTER(minpt, tr + ir, tc + ic); + CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic); + } + } else { + FIRST_LEVEL_CHECKS; + if (halfiters > 1) { + SECOND_LEVEL_CHECKS; + } + + tr = br; + tc = bc; + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (quarteriters > 1) { + SECOND_LEVEL_CHECKS; + } + } + } + + tr = br; + tc = bc; + + if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (eighthiters > 1) { + SECOND_LEVEL_CHECKS; } } @@ -403,7 +434,7 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *sad_list, + int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, @@ -411,14 +442,14 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, int w, int h) { SETUP_SUBPEL_SEARCH; SETUP_CENTER_ERROR; - if (sad_list && - sad_list[0] != INT_MAX && sad_list[1] != INT_MAX && - sad_list[2] != INT_MAX && sad_list[3] != INT_MAX && - sad_list[4] != INT_MAX && - is_sad_list_wellbehaved(sad_list)) { + if (cost_list && + cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && + cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && + cost_list[4] != INT_MAX && + is_cost_list_wellbehaved(cost_list)) { unsigned int minpt; int ir, ic; - get_cost_surf_min(sad_list, &ir, &ic, 1); + get_cost_surf_min(cost_list, &ir, &ic, 1); if (ir != 0 || ic != 0) { CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); } @@ -429,31 +460,28 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, } } - tr = br; - tc = bc; - // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only if (forced_stop != 2) { + tr = br; + tc = bc; hstep >>= 1; FIRST_LEVEL_CHECKS; if (quarteriters > 1) { SECOND_LEVEL_CHECKS; } - tr = br; - tc = bc; } if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { + tr = br; + tc = bc; hstep >>= 1; FIRST_LEVEL_CHECKS; if (eighthiters > 1) { SECOND_LEVEL_CHECKS; } - tr = br; - tc = bc; } // These lines insure static analysis doesn't warn that // tr and tc aren't used after the above point. @@ -477,7 +505,7 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *sad_list, + int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, @@ -485,13 +513,13 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, int w, int h) { SETUP_SUBPEL_SEARCH; SETUP_CENTER_ERROR; - if (sad_list && - sad_list[0] != INT_MAX && sad_list[1] != INT_MAX && - sad_list[2] != INT_MAX && sad_list[3] != INT_MAX && - sad_list[4] != INT_MAX) { + if (cost_list && + cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && + cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && + cost_list[4] != INT_MAX) { unsigned int left, right, up, down, diag; - whichdir = (sad_list[1] < sad_list[3] ? 0 : 1) + - (sad_list[2] < sad_list[4] ? 0 : 2); + whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + + (cost_list[2] < cost_list[4] ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(left, tr, tc - hstep); @@ -569,7 +597,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *sad_list, + int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, @@ -577,7 +605,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, int w, int h) { SETUP_SUBPEL_SEARCH; SETUP_CENTER_ERROR; - (void) sad_list; // to silence compiler warning + (void) cost_list; // to silence compiler warning // Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) @@ -661,12 +689,12 @@ static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates // Calculate and return a sad+mvcost list around an integer best pel. -static INLINE void calc_int_sad_cost_list(MACROBLOCK *x, - const MV *ref_mv, - int sadpb, - const vp9_variance_fn_ptr_t *fn_ptr, - const MV *best_mv, - int *cost_list) { +static INLINE void calc_int_cost_list(const MACROBLOCK *x, + const MV *ref_mv, + int sadpb, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *best_mv, + int *cost_list) { static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; @@ -675,21 +703,24 @@ static INLINE void calc_int_sad_cost_list(MACROBLOCK *x, int bc = best_mv->col; MV this_mv; int i; + unsigned int sse; this_mv.row = br; this_mv.col = bc; - cost_list[0] = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), - in_what->stride) + + cost_list[0] = fn_ptr->vf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, &sse) + mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); if (check_bounds(x, br, bc, 1)) { for (i = 0; i < 4; i++) { const MV this_mv = {br + neighbors[i].row, bc + neighbors[i].col}; - cost_list[i + 1] = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), - in_what->stride) + - mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); + cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, &sse) + + // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); + mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, + x->errorperbit); } } else { for (i = 0; i < 4; i++) { @@ -698,10 +729,12 @@ static INLINE void calc_int_sad_cost_list(MACROBLOCK *x, if (!is_mv_in(x, &this_mv)) cost_list[i + 1] = INT_MAX; else - cost_list[i + 1] = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), - in_what->stride) + - mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); + cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, &sse) + + // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); + mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, + x->errorperbit); } } } @@ -716,7 +749,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, - int *sad_list, + int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, @@ -868,40 +901,14 @@ static int vp9_pattern_search(const MACROBLOCK *x, } // Returns the one-away integer pel sad values around the best as follows: - // sad_list[0]: sad at the best integer pel - // sad_list[1]: sad at delta {0, -1} (left) from the best integer pel - // sad_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel - // sad_list[3]: sad at delta { 0, 1} (right) from the best integer pel - // sad_list[4]: sad at delta {-1, 0} (top) from the best integer pel - if (sad_list) { - static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}; - sad_list[0] = bestsad; - if (check_bounds(x, br, bc, 1)) { - for (i = 0; i < 4; i++) { - const MV this_mv = {br + neighbors[i].row, - bc + neighbors[i].col}; - sad_list[i + 1] = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), - in_what->stride) + - (use_mvcost ? - mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit) : - 0); - } - } else { - for (i = 0; i < 4; i++) { - const MV this_mv = {br + neighbors[i].row, - bc + neighbors[i].col}; - if (!is_mv_in(x, &this_mv)) - sad_list[i + 1] = INT_MAX; - else - sad_list[i + 1] = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), - in_what->stride) + - (use_mvcost ? - mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit) : - 0); - } - } + // cost_list[0]: cost at the best integer pel + // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel + // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel + // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel + // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel + if (cost_list) { + const MV best_mv = { br, bc }; + calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list); } best_mv->row = br; best_mv->col = bc; @@ -909,7 +916,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, } // A specialized function where the smallest scale search candidates -// are 4 1-away neighbors, and sad_list is non-null +// are 4 1-away neighbors, and cost_list is non-null // TODO(debargha): Merge this function with the one above. Also remove // use_mvcost option since it is always 1, to save unnecessary branches. static int vp9_pattern_search_sad(const MACROBLOCK *x, @@ -917,7 +924,7 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, - int *sad_list, + int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, @@ -942,8 +949,8 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); br = ref_mv->row; bc = ref_mv->col; - if (sad_list != NULL) { - sad_list[0] = sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = + if (cost_list != NULL) { + cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; } @@ -997,7 +1004,7 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, // If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { - int do_sad = (num_candidates[0] == 4 && sad_list != NULL); + int do_sad = (num_candidates[0] == 4 && cost_list != NULL); int best_site = -1; s = best_init_s; @@ -1071,15 +1078,15 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, } while (best_site != -1); } - // Note: If we enter the if below, then sad_list must be non-NULL. + // Note: If we enter the if below, then cost_list must be non-NULL. if (s == 0) { - sad_list[0] = bestsad; + cost_list[0] = bestsad; if (!do_init_search || s != best_init_s) { if (check_bounds(x, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = {br + candidates[s][i].row, bc + candidates[s][i].col}; - sad_list[i + 1] = + cost_list[i + 1] = thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); @@ -1091,7 +1098,7 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, bc + candidates[s][i].col}; if (!is_mv_in(x, &this_mv)) continue; - sad_list[i + 1] = + cost_list[i + 1] = thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); @@ -1111,15 +1118,15 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; next_chkpts_indices[1] = k; next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; - sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX; - sad_list[((k + 2) % 4) + 1] = sad_list[0]; - sad_list[0] = bestsad; + cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; + cost_list[((k + 2) % 4) + 1] = cost_list[0]; + cost_list[0] = bestsad; if (check_bounds(x, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, bc + candidates[s][next_chkpts_indices[i]].col}; - sad_list[next_chkpts_indices[i] + 1] = + cost_list[next_chkpts_indices[i] + 1] = thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); @@ -1130,10 +1137,10 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, bc + candidates[s][next_chkpts_indices[i]].col}; if (!is_mv_in(x, &this_mv)) { - sad_list[next_chkpts_indices[i] + 1] = INT_MAX; + cost_list[next_chkpts_indices[i] + 1] = INT_MAX; continue; } - sad_list[next_chkpts_indices[i] + 1] = + cost_list[next_chkpts_indices[i] + 1] = thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); @@ -1151,20 +1158,20 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, } // Returns the one-away integer pel sad values around the best as follows: - // sad_list[0]: sad at the best integer pel - // sad_list[1]: sad at delta {0, -1} (left) from the best integer pel - // sad_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel - // sad_list[3]: sad at delta { 0, 1} (right) from the best integer pel - // sad_list[4]: sad at delta {-1, 0} (top) from the best integer pel - if (sad_list) { + // cost_list[0]: sad at the best integer pel + // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel + // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel + // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel + // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel + if (cost_list) { static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}; - if (sad_list[0] == INT_MAX) { - sad_list[0] = bestsad; + if (cost_list[0] == INT_MAX) { + cost_list[0] = bestsad; if (check_bounds(x, br, bc, 1)) { for (i = 0; i < 4; i++) { - const MV this_mv = {br + neighbors[i].row, - bc + neighbors[i].col}; - sad_list[i + 1] = vfp->sdf(what->buf, what->stride, + const MV this_mv = { br + neighbors[i].row, + bc + neighbors[i].col }; + cost_list[i + 1] = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); } @@ -1173,9 +1180,9 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, const MV this_mv = {br + neighbors[i].row, bc + neighbors[i].col}; if (!is_mv_in(x, &this_mv)) - sad_list[i + 1] = INT_MAX; + cost_list[i + 1] = INT_MAX; else - sad_list[i + 1] = vfp->sdf(what->buf, what->stride, + cost_list[i + 1] = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); } @@ -1185,8 +1192,8 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x, for (i = 0; i < 4; i++) { const MV this_mv = {br + neighbors[i].row, bc + neighbors[i].col}; - if (sad_list[i + 1] != INT_MAX) { - sad_list[i + 1] += + if (cost_list[i + 1] != INT_MAX) { + cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); } } @@ -1236,7 +1243,7 @@ int vp9_hex_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, - int *sad_list, + int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { @@ -1261,7 +1268,7 @@ int vp9_hex_search(const MACROBLOCK *x, { -1024, 0}}, }; return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, - do_init_search, sad_list, vfp, use_mvcost, + do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates); } @@ -1271,7 +1278,7 @@ int vp9_bigdia_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, - int *sad_list, + int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, @@ -1303,7 +1310,7 @@ int vp9_bigdia_search(const MACROBLOCK *x, {-512, 512}, {-1024, 0}}, }; return vp9_pattern_search_sad(x, ref_mv, search_param, sad_per_bit, - do_init_search, sad_list, vfp, use_mvcost, + do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates); } @@ -1313,7 +1320,7 @@ int vp9_square_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, - int *sad_list, + int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, @@ -1345,7 +1352,7 @@ int vp9_square_search(const MACROBLOCK *x, {0, 1024}, {-1024, 1024}, {-1024, 0}}, }; return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, - do_init_search, sad_list, vfp, use_mvcost, + do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); } @@ -1355,13 +1362,13 @@ int vp9_fast_hex_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, // must be zero for fast_hex - int *sad_list, + int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), - sad_per_bit, do_init_search, sad_list, vfp, use_mvcost, + sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv); } @@ -1370,13 +1377,13 @@ int vp9_fast_dia_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, - int *sad_list, + int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), - sad_per_bit, do_init_search, sad_list, vfp, + sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv); } @@ -1659,7 +1666,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, // Return cost list. if (cost_list) { - calc_int_sad_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); + calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); } return bestsme; } @@ -1980,46 +1987,46 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int error_per_bit, - int *sad_list, + int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { const SPEED_FEATURES *const sf = &cpi->sf; const SEARCH_METHODS method = sf->mv.search_method; vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; - if (sad_list) { - sad_list[0] = INT_MAX; - sad_list[1] = INT_MAX; - sad_list[2] = INT_MAX; - sad_list[3] = INT_MAX; - sad_list[4] = INT_MAX; + if (cost_list) { + cost_list[0] = INT_MAX; + cost_list[1] = INT_MAX; + cost_list[2] = INT_MAX; + cost_list[3] = INT_MAX; + cost_list[4] = INT_MAX; } switch (method) { case FAST_DIAMOND: var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, - sad_list, fn_ptr, 1, ref_mv, tmp_mv); + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case FAST_HEX: var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, - sad_list, fn_ptr, 1, ref_mv, tmp_mv); + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case HEX: var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, - sad_list, fn_ptr, 1, ref_mv, tmp_mv); + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case SQUARE: var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, - sad_list, fn_ptr, 1, ref_mv, tmp_mv); + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case BIGDIA: var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, - sad_list, fn_ptr, 1, ref_mv, tmp_mv); + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case NSTEP: var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, MAX_MVSEARCH_STEPS - 1 - step_param, - 1, sad_list, fn_ptr, ref_mv, tmp_mv); + 1, cost_list, fn_ptr, ref_mv, tmp_mv); break; default: assert(!"Invalid search method."); diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 3156cb21e..9ddca250c 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -80,7 +80,7 @@ typedef int (integer_mv_pattern_search_fn) ( int search_param, int error_per_bit, int do_init_search, - int *sad_list, + int *cost_list, const vp9_variance_fn_ptr_t *vf, int use_mvcost, const MV *center_mv, @@ -100,7 +100,7 @@ typedef int (fractional_mv_step_fp) ( const vp9_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, - int *sad_list, + int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, const uint8_t *second_pred, @@ -109,7 +109,7 @@ typedef int (fractional_mv_step_fp) ( extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more; -extern fractional_mv_step_fp vp9_find_best_sub_pixel_surface_fit; +extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_evenmore; typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, @@ -142,7 +142,7 @@ struct VP9_COMP; int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int error_per_bit, - int *sad_list, + int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 428767a44..a498f8205 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -132,7 +132,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const int tmp_row_min = x->mv_row_min; const int tmp_row_max = x->mv_row_max; int rv = 0; - int sad_list[5]; + int cost_list[5]; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); if (cpi->common.show_frame && @@ -160,7 +160,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.row >>= 3; vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 0); x->mv_col_min = tmp_col_min; @@ -187,7 +187,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); x->pred_mv[ref] = tmp_mv->as_mv; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e984225e9..ddd9ed576 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1605,7 +1605,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, int sadpb = x->sadperbit4; MV mvp_full; int max_mv; - int sad_list[5]; + int cost_list[5]; /* Is the best so far sufficiently good that we cant justify doing * and new motion search. */ @@ -1651,7 +1651,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, sadpb, - cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL, + cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1); @@ -1665,7 +1665,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, sadpb, 16, &cpi->fn_ptr[bsize], &bsi->ref_mv[0]->as_mv, &best_mv->as_mv); - sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX; + cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; if (thissme < bestsme) { bestsme = thissme; *new_mv = best_mv->as_mv; @@ -1686,7 +1686,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &distortion, &x->pred_sse[mbmi->ref_frame[0]], @@ -2036,7 +2036,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int tmp_col_max = x->mv_col_max; int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; - int sad_list[5]; + int cost_list[5]; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); @@ -2108,7 +2108,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.row >>= 3; bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); x->mv_col_min = tmp_col_min; @@ -2124,7 +2124,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); } diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 062da09a0..79a0c62bc 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -124,7 +124,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->tx_size_search_method = USE_LARGESTALL; sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->mv.search_method = BIGDIA; - sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; + sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; sf->adaptive_rd_thresh = 4; sf->mode_search_skip_flags |= FLAG_EARLY_TERMINATE; sf->disable_filter_search_var_thresh = 200; @@ -425,8 +425,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned; } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_more; - } else if (sf->mv.subpel_search_method == SUBPEL_SURFACE_FIT) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_surface_fit; + } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_evenmore; } cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index e71a47b35..ffd62f0ee 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -78,9 +78,9 @@ typedef enum { typedef enum { SUBPEL_TREE = 0, - SUBPEL_TREE_PRUNED = 1, - SUBPEL_TREE_PRUNED_MORE = 2, - SUBPEL_SURFACE_FIT = 3, + SUBPEL_TREE_PRUNED = 1, // Prunes 1/2-pel searches + SUBPEL_TREE_PRUNED_MORE = 2, // Prunes 1/2-pel searches more aggressively + SUBPEL_TREE_PRUNED_EVENMORE = 3, // Prunes 1/2- and 1/4-pel searches // Other methods to come } SUBPEL_SEARCH_METHODS; diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 2d594dd09..a067b2503 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -221,7 +221,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int bestsme = INT_MAX; int distortion; unsigned int sse; - int sad_list[5]; + int cost_list[5]; MV best_ref_mv1 = {0, 0}; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ @@ -245,7 +245,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, // Ignore mv costing by sending NULL pointer instead of cost arrays vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv); // Ignore mv costing by sending NULL pointer instead of cost array @@ -255,7 +255,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step, - cond_sad_list(cpi, sad_list), + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0);