Adds two new subpel search methods

One is a more aggressive version of the pruned subpel tree
search where only a single halfpel candidate is searched.
The search candidate is based on a surface fit result.
The other is a method to obtain the subpel position at one
shot based on the same surface fit.

The methods have not been deployed in any speed setting yet.

Change-Id: I34fef3f2e34f11396c9d1ba97f4be8c4ffca62d3
This commit is contained in:
Deb Mukherjee 2014-09-24 13:25:34 -07:00
parent 9ed23de13f
commit 4e9c0d2ad4
4 changed files with 194 additions and 47 deletions

View File

@ -286,6 +286,190 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
bestmv->row *= 8; \
bestmv->col *= 8;
#if CONFIG_VP9_HIGHBITDEPTH
#define SETUP_CENTER_ERROR \
if (second_pred != NULL) { \
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { \
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); \
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, \
y_stride); \
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, \
sse1); \
} else { \
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
} \
} else { \
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
} \
*distortion = besterr; \
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
#else
#define SETUP_CENTER_ERROR \
if (second_pred != NULL) { \
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
} else { \
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
} \
*distortion = besterr; \
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
#endif // CONFIG_VP9_HIGHBITDEPTH
static INLINE int divide_and_round(const int n, const int d) {
return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
}
static INLINE int is_sad_list_wellbehaved(int *sad_list) {
return sad_list[0] >= sad_list[1] &&
sad_list[0] >= sad_list[2] &&
sad_list[0] >= sad_list[3] &&
sad_list[0] >= sad_list[4];
}
// Returns surface minima estimate at given precision in 1/2^n bits.
// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
// For a given set of costs S0, S1, S2, S3, S4 at points
// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
// the solution for the location of the minima (x0, y0) is given by:
// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
// The code below is an integerized version of that.
static void get_cost_surf_min(int *sad_list, int *ir, int *ic,
int bits) {
*ic = divide_and_round((sad_list[1] - sad_list[3]) * (1 << (bits - 1)),
(sad_list[1] - 2 * sad_list[0] + sad_list[3]));
*ir = divide_and_round((sad_list[4] - sad_list[2]) * (1 << (bits - 1)),
(sad_list[4] - 2 * sad_list[0] + sad_list[2]));
}
int vp9_find_best_sub_pixel_surface_fit(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
int iters_per_step,
int *sad_list,
int *mvjcost, int *mvcost[2],
int *distortion,
unsigned int *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
SETUP_CENTER_ERROR;
(void) halfiters;
(void) quarteriters;
(void) eighthiters;
(void) whichdir;
(void) allow_hp;
(void) forced_stop;
(void) hstep;
if (sad_list &&
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
sad_list[4] != INT_MAX &&
is_sad_list_wellbehaved(sad_list)) {
int ir, ic;
unsigned int minpt;
get_cost_surf_min(sad_list, &ir, &ic, 3);
if (ir != 0 || ic != 0) {
CHECK_BETTER(minpt, tr + ir, tc + ic);
}
}
bestmv->row = br;
bestmv->col = bc;
if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
(abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
}
int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
int iters_per_step,
int *sad_list,
int *mvjcost, int *mvcost[2],
int *distortion,
unsigned int *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
SETUP_CENTER_ERROR;
if (sad_list &&
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
sad_list[4] != INT_MAX &&
is_sad_list_wellbehaved(sad_list)) {
unsigned int minpt;
int ir, ic;
get_cost_surf_min(sad_list, &ir, &ic, 1);
if (ir != 0 || ic != 0) {
CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
}
} else {
FIRST_LEVEL_CHECKS;
if (halfiters > 1) {
SECOND_LEVEL_CHECKS;
}
}
tr = br;
tc = bc;
// Each subsequent iteration checks at least one point in common with
// the last iteration could be 2 ( if diag selected) 1/4 pel
// Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
if (forced_stop != 2) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
if (quarteriters > 1) {
SECOND_LEVEL_CHECKS;
}
tr = br;
tc = bc;
}
if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
if (eighthiters > 1) {
SECOND_LEVEL_CHECKS;
}
tr = br;
tc = bc;
}
// These lines insure static analysis doesn't warn that
// tr and tc aren't used after the above point.
(void) tr;
(void) tc;
bestmv->row = br;
bestmv->col = bc;
if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
(abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
}
int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
@ -300,30 +484,7 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
if (second_pred != NULL) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
sse1);
} else {
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
}
#else
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
}
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
SETUP_CENTER_ERROR;
if (sad_list &&
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
@ -415,29 +576,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
if (second_pred != NULL) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
sse1);
} else {
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
}
#else
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
}
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
SETUP_CENTER_ERROR;
(void) sad_list; // to silence compiler warning
// Each subsequent iteration checks at least one point in

View File

@ -108,6 +108,8 @@ typedef int (fractional_mv_step_fp) (
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_surface_fit;
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
const MV *ref_mv, int sad_per_bit,

View File

@ -421,6 +421,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_more;
} else if (sf->mv.subpel_search_method == SUBPEL_SURFACE_FIT) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_surface_fit;
}
cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;

View File

@ -79,6 +79,8 @@ typedef enum {
typedef enum {
SUBPEL_TREE = 0,
SUBPEL_TREE_PRUNED = 1,
SUBPEL_TREE_PRUNED_MORE = 2,
SUBPEL_SURFACE_FIT = 3,
// Other methods to come
} SUBPEL_SEARCH_METHODS;