From 83c76118eb7129f6865a2aa28251aac68785b324 Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 9 Sep 2014 20:07:11 -0700 Subject: [PATCH] Use bigdia search with pruned subpel search Improves function to return sad of integer pels by reusing integer pels already visited in the smallest scale. Turns on BIGDIA search for speed 4. Also, turns on the first version of the pruned subpel search at this speed. derf: -0.32% (speed 4) Speed seems to improve by at least 5% but subject to verification. Change-Id: Iaec8eaffd61d6237ac029e6a2a1b0a88b2a35271 --- vp9/encoder/vp9_mcomp.c | 334 +++++++++++++++++++++++++++++-- vp9/encoder/vp9_speed_features.c | 2 + 2 files changed, 317 insertions(+), 19 deletions(-) diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index d6f6b2563..a25dc61aa 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -319,25 +319,25 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, (sad_list[2] < sad_list[4] ? 0 : 2); switch (whichdir) { case 0: - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(down, tr + hstep, tc); CHECK_BETTER(diag, tr + hstep, tc - hstep); break; - case 3: + case 1: CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(down, tr + hstep, tc); CHECK_BETTER(diag, tr + hstep, tc + hstep); break; + case 2: + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; } } else { FIRST_LEVEL_CHECKS; @@ -648,11 +648,11 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Returns the one-away integer pel sad values around the best as follows: // sad_list[0]: sad at the best integer pel // sad_list[1]: sad at delta {0, -1} (left) from the best integer pel - // sad_list[2]: sad at delta {-1, 0} (top) from the best integer pel + // sad_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel // sad_list[3]: sad at delta { 0, 1} (right) from the best integer pel - // sad_list[4]: sad at delta { 1, 0} (bottom) from the best integer pel + // sad_list[4]: sad at delta {-1, 0} (top) from the best integer pel if (sad_list) { - static const MV neighbors[4] = {{0, -1}, {-1, 0}, {0, 1}, {1, 0}}; + static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}; sad_list[0] = bestsad; if (check_bounds(x, br, bc, 1)) { for (i = 0; i < 4; i++) { @@ -660,7 +660,10 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + neighbors[i].col}; sad_list[i + 1] = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride); + in_what->stride) + + (use_mvcost ? + mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit) : + 0); } } else { for (i = 0; i < 4; i++) { @@ -669,9 +672,302 @@ static int vp9_pattern_search(const MACROBLOCK *x, if (!is_mv_in(x, &this_mv)) sad_list[i + 1] = INT_MAX; else + sad_list[i + 1] = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride) + + (use_mvcost ? + mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit) : + 0); + } + } + } + best_mv->row = br; + best_mv->col = bc; + return bestsad; +} + +// A specialized function where the smallest scale search candidates +// are 4 1-away neighbors, and sad_list is non-null +// TODO(debargha): Merge this function with the one above. Also remove +// use_mvcost option since it is always 1, to save unnecessary branches. +static int vp9_pattern_search_sad(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + int do_init_search, + int *sad_list, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, + MV *best_mv, + const int num_candidates[MAX_PATTERN_SCALES], + const MV candidates[MAX_PATTERN_SCALES] + [MAX_PATTERN_CANDIDATES]) { + const MACROBLOCKD *const xd = &x->e_mbd; + static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { + 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + }; + int i, s, t; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + int br, bc; + int bestsad = INT_MAX; + int thissad; + int k = -1; + const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + int best_init_s = search_param_to_steps[search_param]; + // adjust ref_mv to make sure it is within MV range + clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + br = ref_mv->row; + bc = ref_mv->col; + if (sad_list != NULL) { + sad_list[0] = sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = + INT_MAX; + } + + // Work out the start point for the search + bestsad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); + + // Search all possible scales upto the search param around the center point + // pick the scale of the point that is best as the starting scale of + // further steps around it. + if (do_init_search) { + s = best_init_s; + best_init_s = -1; + for (t = 0; t <= s; ++t) { + int best_site = -1; + if (check_bounds(x, br, bc, 1 << t)) { + for (i = 0; i < num_candidates[t]; i++) { + const MV this_mv = {br + candidates[t][i].row, + bc + candidates[t][i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } else { + for (i = 0; i < num_candidates[t]; i++) { + const MV this_mv = {br + candidates[t][i].row, + bc + candidates[t][i].col}; + if (!is_mv_in(x, &this_mv)) + continue; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } + if (best_site == -1) { + continue; + } else { + best_init_s = t; + k = best_site; + } + } + if (best_init_s != -1) { + br += candidates[best_init_s][k].row; + bc += candidates[best_init_s][k].col; + } + } + + // If the center point is still the best, just skip this and move to + // the refinement step. + if (best_init_s != -1) { + int do_sad = (num_candidates[0] == 4 && sad_list != NULL); + int best_site = -1; + s = best_init_s; + + for (; s >= do_sad; s--) { + if (!do_init_search || s != best_init_s) { + if (check_bounds(x, br, bc, 1 << s)) { + for (i = 0; i < num_candidates[s]; i++) { + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } else { + for (i = 0; i < num_candidates[s]; i++) { + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; + if (!is_mv_in(x, &this_mv)) + continue; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } + + if (best_site == -1) { + continue; + } else { + br += candidates[s][best_site].row; + bc += candidates[s][best_site].col; + k = best_site; + } + } + + do { + int next_chkpts_indices[PATTERN_CANDIDATES_REF]; + best_site = -1; + next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; + next_chkpts_indices[1] = k; + next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; + + if (check_bounds(x, br, bc, 1 << s)) { + for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } else { + for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; + if (!is_mv_in(x, &this_mv)) + continue; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } + + if (best_site != -1) { + k = next_chkpts_indices[best_site]; + br += candidates[s][k].row; + bc += candidates[s][k].col; + } + } while (best_site != -1); + } + + // Note: If we enter the if below, then sad_list must be non-NULL. + if (s == 0) { + sad_list[0] = bestsad; + if (!do_init_search || s != best_init_s) { + if (check_bounds(x, br, bc, 1 << s)) { + for (i = 0; i < num_candidates[s]; i++) { + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; + sad_list[i + 1] = + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } else { + for (i = 0; i < num_candidates[s]; i++) { + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; + if (!is_mv_in(x, &this_mv)) + continue; + sad_list[i + 1] = + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } + + if (best_site != -1) { + br += candidates[s][best_site].row; + bc += candidates[s][best_site].col; + k = best_site; + } + } + while (best_site != -1) { + int next_chkpts_indices[PATTERN_CANDIDATES_REF]; + best_site = -1; + next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; + next_chkpts_indices[1] = k; + next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; + sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX; + sad_list[((k + 2) % 4) + 1] = sad_list[0]; + sad_list[0] = bestsad; + + if (check_bounds(x, br, bc, 1 << s)) { + for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; + sad_list[next_chkpts_indices[i] + 1] = + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } else { + for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; + if (!is_mv_in(x, &this_mv)) { + sad_list[next_chkpts_indices[i] + 1] = INT_MAX; + continue; + } + sad_list[next_chkpts_indices[i] + 1] = + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + CHECK_BETTER + } + } + + if (best_site != -1) { + k = next_chkpts_indices[best_site]; + br += candidates[s][k].row; + bc += candidates[s][k].col; + } + } + } + } + + // Returns the one-away integer pel sad values around the best as follows: + // sad_list[0]: sad at the best integer pel + // sad_list[1]: sad at delta {0, -1} (left) from the best integer pel + // sad_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel + // sad_list[3]: sad at delta { 0, 1} (right) from the best integer pel + // sad_list[4]: sad at delta {-1, 0} (top) from the best integer pel + if (sad_list) { + static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}; + if (sad_list[0] == INT_MAX) { + sad_list[0] = bestsad; + if (check_bounds(x, br, bc, 1)) { + for (i = 0; i < 4; i++) { + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; sad_list[i + 1] = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); + } + } else { + for (i = 0; i < 4; i++) { + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; + if (!is_mv_in(x, &this_mv)) + sad_list[i + 1] = INT_MAX; + else + sad_list[i + 1] = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); + } + } + } else { + if (use_mvcost) { + for (i = 0; i < 4; i++) { + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; + if (sad_list[i + 1] != INT_MAX) { + sad_list[i + 1] += + mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); + } + } } } } @@ -784,10 +1080,10 @@ int vp9_bigdia_search(const MACROBLOCK *x, {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, {-512, 512}, {-1024, 0}}, }; - return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, - do_init_search, sad_list, vfp, use_mvcost, - center_mv, best_mv, - bigdia_num_candidates, bigdia_candidates); + return vp9_pattern_search_sad(x, ref_mv, search_param, sad_per_bit, + do_init_search, sad_list, vfp, use_mvcost, + center_mv, best_mv, + bigdia_num_candidates, bigdia_candidates); } int vp9_square_search(const MACROBLOCK *x, diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 04f3cc20f..0dcbb0801 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -160,6 +160,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->use_square_partition_only = 1; sf->tx_size_search_method = USE_LARGESTALL; sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->mv.search_method = BIGDIA; + sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; sf->adaptive_rd_thresh = 4; sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH | FLAG_EARLY_TERMINATE;