Adds two new subpel search methods
One is a more aggressive version of the pruned subpel tree search where only a single halfpel candidate is searched. The search candidate is based on a surface fit result. The other is a method to obtain the subpel position at one shot based on the same surface fit. The methods have not been deployed in any speed setting yet. Change-Id: I34fef3f2e34f11396c9d1ba97f4be8c4ffca62d3
This commit is contained in:
parent
9ed23de13f
commit
4e9c0d2ad4
@ -286,6 +286,190 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
|
||||
bestmv->row *= 8; \
|
||||
bestmv->col *= 8;
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#define SETUP_CENTER_ERROR \
|
||||
if (second_pred != NULL) { \
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { \
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); \
|
||||
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, \
|
||||
y_stride); \
|
||||
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, \
|
||||
sse1); \
|
||||
} else { \
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
|
||||
} \
|
||||
} else { \
|
||||
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
|
||||
} \
|
||||
*distortion = besterr; \
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
|
||||
#else
|
||||
|
||||
#define SETUP_CENTER_ERROR \
|
||||
if (second_pred != NULL) { \
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
|
||||
} else { \
|
||||
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
|
||||
} \
|
||||
*distortion = besterr; \
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
|
||||
|
||||
|
||||
static INLINE int divide_and_round(const int n, const int d) {
|
||||
return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
|
||||
}
|
||||
|
||||
static INLINE int is_sad_list_wellbehaved(int *sad_list) {
|
||||
return sad_list[0] >= sad_list[1] &&
|
||||
sad_list[0] >= sad_list[2] &&
|
||||
sad_list[0] >= sad_list[3] &&
|
||||
sad_list[0] >= sad_list[4];
|
||||
}
|
||||
|
||||
// Returns surface minima estimate at given precision in 1/2^n bits.
|
||||
// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
|
||||
// For a given set of costs S0, S1, S2, S3, S4 at points
|
||||
// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
|
||||
// the solution for the location of the minima (x0, y0) is given by:
|
||||
// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
|
||||
// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
|
||||
// The code below is an integerized version of that.
|
||||
static void get_cost_surf_min(int *sad_list, int *ir, int *ic,
|
||||
int bits) {
|
||||
*ic = divide_and_round((sad_list[1] - sad_list[3]) * (1 << (bits - 1)),
|
||||
(sad_list[1] - 2 * sad_list[0] + sad_list[3]));
|
||||
*ir = divide_and_round((sad_list[4] - sad_list[2]) * (1 << (bits - 1)),
|
||||
(sad_list[4] - 2 * sad_list[0] + sad_list[2]));
|
||||
}
|
||||
|
||||
int vp9_find_best_sub_pixel_surface_fit(const MACROBLOCK *x,
|
||||
MV *bestmv, const MV *ref_mv,
|
||||
int allow_hp,
|
||||
int error_per_bit,
|
||||
const vp9_variance_fn_ptr_t *vfp,
|
||||
int forced_stop,
|
||||
int iters_per_step,
|
||||
int *sad_list,
|
||||
int *mvjcost, int *mvcost[2],
|
||||
int *distortion,
|
||||
unsigned int *sse1,
|
||||
const uint8_t *second_pred,
|
||||
int w, int h) {
|
||||
SETUP_SUBPEL_SEARCH;
|
||||
SETUP_CENTER_ERROR;
|
||||
(void) halfiters;
|
||||
(void) quarteriters;
|
||||
(void) eighthiters;
|
||||
(void) whichdir;
|
||||
(void) allow_hp;
|
||||
(void) forced_stop;
|
||||
(void) hstep;
|
||||
|
||||
if (sad_list &&
|
||||
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
|
||||
sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
|
||||
sad_list[4] != INT_MAX &&
|
||||
is_sad_list_wellbehaved(sad_list)) {
|
||||
int ir, ic;
|
||||
unsigned int minpt;
|
||||
get_cost_surf_min(sad_list, &ir, &ic, 3);
|
||||
if (ir != 0 || ic != 0) {
|
||||
CHECK_BETTER(minpt, tr + ir, tc + ic);
|
||||
}
|
||||
}
|
||||
|
||||
bestmv->row = br;
|
||||
bestmv->col = bc;
|
||||
|
||||
if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
|
||||
(abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
|
||||
return INT_MAX;
|
||||
|
||||
return besterr;
|
||||
}
|
||||
|
||||
int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
|
||||
MV *bestmv, const MV *ref_mv,
|
||||
int allow_hp,
|
||||
int error_per_bit,
|
||||
const vp9_variance_fn_ptr_t *vfp,
|
||||
int forced_stop,
|
||||
int iters_per_step,
|
||||
int *sad_list,
|
||||
int *mvjcost, int *mvcost[2],
|
||||
int *distortion,
|
||||
unsigned int *sse1,
|
||||
const uint8_t *second_pred,
|
||||
int w, int h) {
|
||||
SETUP_SUBPEL_SEARCH;
|
||||
SETUP_CENTER_ERROR;
|
||||
if (sad_list &&
|
||||
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
|
||||
sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
|
||||
sad_list[4] != INT_MAX &&
|
||||
is_sad_list_wellbehaved(sad_list)) {
|
||||
unsigned int minpt;
|
||||
int ir, ic;
|
||||
get_cost_surf_min(sad_list, &ir, &ic, 1);
|
||||
if (ir != 0 || ic != 0) {
|
||||
CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
|
||||
}
|
||||
} else {
|
||||
FIRST_LEVEL_CHECKS;
|
||||
if (halfiters > 1) {
|
||||
SECOND_LEVEL_CHECKS;
|
||||
}
|
||||
}
|
||||
|
||||
tr = br;
|
||||
tc = bc;
|
||||
|
||||
// Each subsequent iteration checks at least one point in common with
|
||||
// the last iteration could be 2 ( if diag selected) 1/4 pel
|
||||
|
||||
// Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
|
||||
if (forced_stop != 2) {
|
||||
hstep >>= 1;
|
||||
FIRST_LEVEL_CHECKS;
|
||||
if (quarteriters > 1) {
|
||||
SECOND_LEVEL_CHECKS;
|
||||
}
|
||||
tr = br;
|
||||
tc = bc;
|
||||
}
|
||||
|
||||
if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
|
||||
hstep >>= 1;
|
||||
FIRST_LEVEL_CHECKS;
|
||||
if (eighthiters > 1) {
|
||||
SECOND_LEVEL_CHECKS;
|
||||
}
|
||||
tr = br;
|
||||
tc = bc;
|
||||
}
|
||||
// These lines insure static analysis doesn't warn that
|
||||
// tr and tc aren't used after the above point.
|
||||
(void) tr;
|
||||
(void) tc;
|
||||
|
||||
bestmv->row = br;
|
||||
bestmv->col = bc;
|
||||
|
||||
if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
|
||||
(abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
|
||||
return INT_MAX;
|
||||
|
||||
return besterr;
|
||||
}
|
||||
|
||||
int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
|
||||
MV *bestmv, const MV *ref_mv,
|
||||
int allow_hp,
|
||||
@ -300,30 +484,7 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
|
||||
const uint8_t *second_pred,
|
||||
int w, int h) {
|
||||
SETUP_SUBPEL_SEARCH;
|
||||
if (second_pred != NULL) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
|
||||
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
|
||||
y_stride);
|
||||
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
|
||||
sse1);
|
||||
} else {
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
|
||||
}
|
||||
#else
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} else {
|
||||
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
|
||||
}
|
||||
*distortion = besterr;
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
|
||||
SETUP_CENTER_ERROR;
|
||||
if (sad_list &&
|
||||
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
|
||||
sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
|
||||
@ -415,29 +576,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
||||
const uint8_t *second_pred,
|
||||
int w, int h) {
|
||||
SETUP_SUBPEL_SEARCH;
|
||||
if (second_pred != NULL) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
|
||||
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
|
||||
y_stride);
|
||||
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
|
||||
sse1);
|
||||
} else {
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
|
||||
}
|
||||
#else
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
|
||||
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
|
||||
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} else {
|
||||
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
|
||||
}
|
||||
*distortion = besterr;
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
SETUP_CENTER_ERROR;
|
||||
(void) sad_list; // to silence compiler warning
|
||||
|
||||
// Each subsequent iteration checks at least one point in
|
||||
|
@ -108,6 +108,8 @@ typedef int (fractional_mv_step_fp) (
|
||||
|
||||
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
|
||||
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
|
||||
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more;
|
||||
extern fractional_mv_step_fp vp9_find_best_sub_pixel_surface_fit;
|
||||
|
||||
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
|
||||
const MV *ref_mv, int sad_per_bit,
|
||||
|
@ -421,6 +421,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
||||
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
|
||||
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
|
||||
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned;
|
||||
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
|
||||
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_more;
|
||||
} else if (sf->mv.subpel_search_method == SUBPEL_SURFACE_FIT) {
|
||||
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_surface_fit;
|
||||
}
|
||||
|
||||
cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
|
||||
|
@ -79,6 +79,8 @@ typedef enum {
|
||||
typedef enum {
|
||||
SUBPEL_TREE = 0,
|
||||
SUBPEL_TREE_PRUNED = 1,
|
||||
SUBPEL_TREE_PRUNED_MORE = 2,
|
||||
SUBPEL_SURFACE_FIT = 3,
|
||||
// Other methods to come
|
||||
} SUBPEL_SEARCH_METHODS;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user