Turn on global rotation-zoom warping prediction

With this patch, the ZEROMV mode is overloaded to represent
a single global dominant motion using one of three models:
1. True zero translation motion (as before)
2. A translation motion different from 0
3. A Rotation-zoom affine model where the predictor is warped
The actual model used is indicated at the frame level for
each reference frame.
A metric that computes the ratio of the error with a global
non-zero model to the error for zero motion, is used to
determine on the encoder side whether to use one of the two
non-zero models or not.

Change-Id: I1f3d235b8860e543191237024a89041ff3aad689
This commit is contained in:
Debargha Mukherjee 2015-05-18 16:23:24 -07:00
parent c4f04808eb
commit 5930a90cc9
10 changed files with 265 additions and 55 deletions

View File

@ -129,8 +129,7 @@ const vp9_tree_index vp9_global_motion_types_tree
static const vp9_prob default_global_motion_types_prob
[GLOBAL_MOTION_TYPES - 1] = {
// Currently only translation is used, so make the second prob very high.
240, 255
224, 128
};
#endif // CONFIG_GLOBAL_MOTION

View File

@ -163,9 +163,67 @@ static void WarpImage(TransformationType type, double *H,
}
}
static void convert_params_to_rotzoom(Global_Motion_Params *model,
double *H) {
double z = 1.0 + (double) model->zoom / (1 << ZOOM_PRECISION_BITS);
// Computes the ratio of the warp error to the zero motion error
double vp9_warp_erroradv_unq(TransformationType type, double *H,
unsigned char *ref,
int width, int height, int stride,
unsigned char *src,
int p_col, int p_row,
int p_width, int p_height, int p_stride,
int subsampling_col, int subsampling_row,
int x_scale, int y_scale) {
double H_z_translation[] = {0, 0};
double H_z_rotzoom[] = {1, 0, 0, 0};
double H_z_affine[] = {1, 0, 0, 1, 0, 0};
double H_z_homography[] = {1, 0, 0, 0, 1, 0, 0, 0, 1};
double *H_z = H_z_rotzoom;
int i, j;
int64_t sumerr = 0;
int64_t sumerr_z = 0;
projectPointsType projectPoints = get_projectPointsType(type);
if (type == TRANSLATION)
H_z = H_z_translation;
else if (type == ROTZOOM)
H_z = H_z_rotzoom;
else if (type == AFFINE)
H_z = H_z_affine;
else if (type == HOMOGRAPHY)
H_z = H_z_homography;
else
assert(0 && "Unknown TransformationType");
if (projectPoints == NULL)
return -1;
for (i = p_row; i < p_row + p_height; ++i) {
for (j = p_col; j < p_col + p_width; ++j) {
double in[2], out[2], out_z[2];
uint8_t pred, pred_z;
int err, err_z;
in[0] = subsampling_col ? 2 * j + 0.5 : j;
in[1] = subsampling_row ? 2 * i + 0.5 : i;
projectPoints(H, in, out, 1, 2, 2);
out[0] = subsampling_col ? (out[0] - 0.5) / 2.0 : out[0];
out[1] = subsampling_row ? (out[1] - 0.5) / 2.0 : out[1];
out[0] *= x_scale / 16.0;
out[1] *= y_scale / 16.0;
pred = bilinear(ref, out[0], out[1], width, height, stride);
err = pred - src[(j - p_col) + (i - p_row) * p_stride];
sumerr += err * err;
projectPoints(H_z, in, out_z, 1, 2, 2);
out_z[0] = subsampling_col ? (out_z[0] - 0.5) / 2.0 : out_z[0];
out_z[1] = subsampling_row ? (out_z[1] - 0.5) / 2.0 : out_z[1];
out_z[0] *= x_scale / 16.0;
out_z[1] *= y_scale / 16.0;
pred_z = bilinear(ref, out_z[0], out_z[1], width, height, stride);
err_z = pred_z - src[(j - p_col) + (i - p_row) * p_stride];
sumerr_z += err_z * err_z;
}
}
return (double)sumerr / sumerr_z;
}
void vp9_convert_params_to_rotzoom(Global_Motion_Params *model,
double *H) {
double z = (double) model->zoom / (1 << ZOOM_PRECISION_BITS);
double r = (double) model->rotation / (1 << ROTATION_PRECISION_BITS);
H[0] = (1 + z) * cos(r * M_PI / 180.0);
H[1] = -(1 + z) * sin(r * M_PI / 180.0);
@ -182,7 +240,7 @@ void vp9_warp_plane(Global_Motion_Params *gm,
int subsampling_col, int subsampling_row,
int x_scale, int y_scale) {
double H[9];
convert_params_to_rotzoom(gm, H);
vp9_convert_params_to_rotzoom(gm, H);
WarpImage(ROTZOOM, H,
ref, width, height, stride,
pred, p_col, p_row, p_width, p_height, p_stride,
@ -190,11 +248,28 @@ void vp9_warp_plane(Global_Motion_Params *gm,
x_scale, y_scale);
}
double vp9_warp_erroradv(Global_Motion_Params *gm,
unsigned char *ref,
int width, int height, int stride,
unsigned char *src,
int p_col, int p_row,
int p_width, int p_height, int p_stride,
int subsampling_col, int subsampling_row,
int x_scale, int y_scale) {
double H[9];
vp9_convert_params_to_rotzoom(gm, H);
return vp9_warp_erroradv_unq(ROTZOOM, H,
ref, width, height, stride,
src, p_col, p_row, p_width, p_height, p_stride,
subsampling_col, subsampling_row,
x_scale, y_scale);
}
static int_mv vp9_get_global_mv(int col, int row, Global_Motion_Params *model) {
int_mv mv;
int_mv mv;
double H[4];
double x, y;
convert_params_to_rotzoom(model, H);
vp9_convert_params_to_rotzoom(model, H);
x = H[0] * col + H[1] * row + H[2];
y = -H[1] * col + H[0] * row + H[3];
mv.as_mv.col = (int)floor(x * 8 + 0.5) - col;

View File

@ -64,6 +64,8 @@ void projectPointsTranslation(double *mat, double *points, double *proj,
const int n, const int stride_points,
const int stride_proj);
void vp9_convert_params_to_rotzoom(Global_Motion_Params *model, double *H);
void vp9_warp_plane(Global_Motion_Params *gm,
unsigned char *ref,
int width, int height, int stride,
@ -73,6 +75,23 @@ void vp9_warp_plane(Global_Motion_Params *gm,
int subsampling_col, int subsampling_row,
int x_scale, int y_scale);
double vp9_warp_erroradv(Global_Motion_Params *gm,
unsigned char *ref,
int width, int height, int stride,
unsigned char *src,
int p_col, int p_row,
int p_width, int p_height, int p_stride,
int subsampling_col, int subsampling_row,
int x_scale, int y_scale);
double vp9_warp_erroradv_unq(TransformationType type, double *H,
unsigned char *ref,
int width, int height, int stride,
unsigned char *src,
int p_col, int p_row,
int p_width, int p_height, int p_stride,
int subsampling_col, int subsampling_row,
int x_scale, int y_scale);
int_mv vp9_get_global_sb_center_mv(int col, int row, int bw, int bh,
Global_Motion_Params *model);
int_mv vp9_get_global_sub8x8_center_mv(int col, int row, int block,

View File

@ -51,11 +51,11 @@ static INLINE void clamp_mv(MV *mv, int min_col, int max_col,
#if CONFIG_GLOBAL_MOTION
#define MAX_GLOBAL_MOTION_MODELS 1
#define ZOOM_PRECISION_BITS 6
#define ROTATION_PRECISION_BITS 4
#define ZOOM_PRECISION_BITS 11
#define ROTATION_PRECISION_BITS 8
#define ABS_ZOOM_BITS 3
#define ABS_ROTATION_BITS 4
#define ABS_ZOOM_BITS 6
#define ABS_ROTATION_BITS 6
#define ABS_TRANSLATION_BITS 7
typedef enum {
@ -73,7 +73,7 @@ typedef struct {
int_mv mv;
} Global_Motion_Params;
static INLINE GLOBAL_MOTION_TYPE get_gmtype(Global_Motion_Params *gm) {
static INLINE GLOBAL_MOTION_TYPE get_gmtype(const Global_Motion_Params *gm) {
if (gm->rotation == 0 && gm->zoom == 0) {
return (gm->mv.as_int == 0 ? GLOBAL_ZERO : GLOBAL_TRANSLATION);
} else {

View File

@ -2623,6 +2623,14 @@ static void read_global_motion(VP9_COMMON *cm, vp9_reader *r) {
for (i = 0; i < cm->num_global_motion[frame]; ++i) {
read_global_motion_params(
cm->global_motion[frame], cm->fc.global_motion_types_prob, r);
/*
printf("Dec Ref %d [%d]: %d %d %d %d\n",
frame, cm->current_video_frame,
cm->global_motion[frame][i].zoom,
cm->global_motion[frame][i].rotation,
cm->global_motion[frame][i].mv.as_mv.col,
cm->global_motion[frame][i].mv.as_mv.row);
*/
}
}
}

View File

@ -1221,6 +1221,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
break;
}
mi->bmi[j].as_mode = b_mode;
mi->bmi[j].as_mv[0].as_int = mv_sub8x8[0].as_int;
if (is_compound)
mi->bmi[j].as_mv[1].as_int = mv_sub8x8[1].as_int;

View File

@ -2192,15 +2192,7 @@ static void write_uncompressed_header(VP9_COMP *cpi,
static void write_global_motion_params(Global_Motion_Params *params,
vp9_prob *probs,
vp9_writer *w) {
GLOBAL_MOTION_TYPE gmtype;
if (params->zoom == 0 && params->rotation == 0) {
if (params->mv.as_int == 0)
gmtype = GLOBAL_ZERO;
else
gmtype = GLOBAL_TRANSLATION;
} else {
gmtype = GLOBAL_ROTZOOM;
}
GLOBAL_MOTION_TYPE gmtype = get_gmtype(params);
vp9_write_token(w, vp9_global_motion_types_tree, probs,
&global_motion_types_encodings[gmtype]);
switch (gmtype) {
@ -2236,9 +2228,9 @@ static void write_global_motion(VP9_COMP *cpi, vp9_writer *w) {
MAX_GLOBAL_MOTION_MODELS * sizeof(*cm->global_motion[frame]));
}
write_global_motion_params(
cm->global_motion[frame], cm->fc.global_motion_types_prob, w);
&cm->global_motion[frame][0], cm->fc.global_motion_types_prob, w);
/*
printf("Ref %d [%d] (used %d): %d %d %d %d\n",
printf("Enc Ref %d [%d] (used %d): %d %d %d %d\n",
frame, cm->current_video_frame, cpi->global_motion_used[frame],
cm->global_motion[frame][i].zoom,
cm->global_motion[frame][i].rotation,

View File

@ -4591,7 +4591,13 @@ static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
#endif
#if CONFIG_GLOBAL_MOTION
#define MIN_TRANSLATION_THRESH 16
#define MIN_TRANSLATION_THRESH 8
#define GLOBAL_MOTION_MODEL ROTZOOM
#define GLOBAL_MOTION_ADVANTAGE_THRESH_RZ 0.60
#define GLOBAL_MOTION_ADVANTAGE_THRESH_TR 0.75
// #define USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
static void convert_translation_to_params(
double *H, Global_Motion_Params *model) {
model->mv.as_mv.col = (int) floor(H[0] * 8 + 0.5);
@ -4599,13 +4605,14 @@ static void convert_translation_to_params(
if (abs(model->mv.as_mv.col) < MIN_TRANSLATION_THRESH &&
abs(model->mv.as_mv.row) < MIN_TRANSLATION_THRESH) {
model->mv.as_int = 0;
} else {
model->mv.as_mv.col =
clamp(model->mv.as_mv.col,
-(1 << ABS_TRANSLATION_BITS), (1 << ABS_TRANSLATION_BITS));
model->mv.as_mv.row =
clamp(model->mv.as_mv.row,
-(1 << ABS_TRANSLATION_BITS), (1 << ABS_TRANSLATION_BITS));
}
model->mv.as_mv.col =
clamp(model->mv.as_mv.col,
-(1 << ABS_TRANSLATION_BITS), (1 << ABS_TRANSLATION_BITS));
model->mv.as_mv.row =
clamp(model->mv.as_mv.row,
-(1 << ABS_TRANSLATION_BITS), (1 << ABS_TRANSLATION_BITS));
}
static void convert_rotzoom_to_params(double *H, Global_Motion_Params *model) {
@ -4620,7 +4627,21 @@ static void convert_rotzoom_to_params(double *H, Global_Motion_Params *model) {
model->rotation = clamp(
model->rotation, -(1 << ABS_ROTATION_BITS), (1 << ABS_ROTATION_BITS));
convert_translation_to_params(H + 2, model);
model->mv.as_mv.col = (int) floor(H[2] * 8 + 0.5);
model->mv.as_mv.row = (int) floor(H[3] * 8 + 0.5);
model->mv.as_mv.col =
clamp(model->mv.as_mv.col,
-(1 << ABS_TRANSLATION_BITS), (1 << ABS_TRANSLATION_BITS));
model->mv.as_mv.row =
clamp(model->mv.as_mv.row,
-(1 << ABS_TRANSLATION_BITS), (1 << ABS_TRANSLATION_BITS));
if (model->zoom == 0 && model->rotation == 0) {
if (abs(model->mv.as_mv.col) < MIN_TRANSLATION_THRESH &&
abs(model->mv.as_mv.row) < MIN_TRANSLATION_THRESH) {
model->mv.as_int = 0;
}
}
}
static void convert_model_to_params(double *H, TransformationType type,
@ -4664,8 +4685,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
cm->tx_mode = select_tx_mode(cpi);
#if CONFIG_GLOBAL_MOTION
#define GLOBAL_MOTION_MODEL TRANSLATION
// #define USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
vp9_clear_system_state();
vp9_zero(cpi->global_motion_used);
vpx_memset(cm->num_global_motion, 0, sizeof(cm->num_global_motion));
@ -4688,25 +4707,63 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_compute_global_motion_multiple_feature_based(
cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
#endif
#endif // USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
int i;
for (i = 0; i < num; i++) {
/*
printf("Ref %d [%d]: %f %f\n",
frame, cm->current_video_frame,
global_motion[i * get_numparams(GLOBAL_MOTION_MODEL)],
global_motion[i * get_numparams(GLOBAL_MOTION_MODEL) + 1]);
*/
convert_model_to_params(
global_motion + i * get_numparams(GLOBAL_MOTION_MODEL),
GLOBAL_MOTION_MODEL,
&cm->global_motion[frame][i]);
printf("Ref %d [%d]: %d %d %d %d\n",
frame, cm->current_video_frame,
cm->global_motion[frame][i].zoom,
cm->global_motion[frame][i].rotation,
cm->global_motion[frame][i].mv.as_mv.col,
cm->global_motion[frame][i].mv.as_mv.row);
if (get_gmtype(&cm->global_motion[frame][i]) != GLOBAL_ZERO) {
double erroradvantage_trans;
double erroradvantage =
vp9_warp_erroradv(&cm->global_motion[frame][i],
ref_buf->y_buffer,
ref_buf->y_crop_width,
ref_buf->y_crop_height,
ref_buf->y_stride,
cpi->Source->y_buffer,
0, 0,
cpi->Source->y_crop_width,
cpi->Source->y_crop_height,
cpi->Source->y_stride,
0, 0, 16, 16);
if (get_gmtype(&cm->global_motion[frame][i]) == GLOBAL_ROTZOOM) {
Global_Motion_Params gm = cm->global_motion[frame][i];
gm.rotation = 0;
gm.zoom = 0;
gm.gmtype = GLOBAL_TRANSLATION;
erroradvantage_trans =
vp9_warp_erroradv(&gm,
ref_buf->y_buffer,
ref_buf->y_crop_width,
ref_buf->y_crop_height,
ref_buf->y_stride,
cpi->Source->y_buffer,
0, 0,
cpi->Source->y_crop_width,
cpi->Source->y_crop_height,
cpi->Source->y_stride,
0, 0, 16, 16);
} else {
erroradvantage_trans = erroradvantage;
erroradvantage = 10;
}
if (erroradvantage > GLOBAL_MOTION_ADVANTAGE_THRESH_RZ) {
if (erroradvantage_trans > GLOBAL_MOTION_ADVANTAGE_THRESH_TR) {
// Not enough advantage in using a global model. Make 0.
vpx_memset(&cm->global_motion[frame][i], 0,
sizeof(cm->global_motion[frame][i]));
} else {
if (cm->global_motion[frame][i].gmtype == GLOBAL_ROTZOOM) {
cm->global_motion[frame][i].rotation = 0;
cm->global_motion[frame][i].zoom = 0;
cm->global_motion[frame][i].gmtype =
get_gmtype(&cm->global_motion[frame][i]);
}
}
}
}
}
cm->num_global_motion[frame] = num;
}

View File

@ -183,9 +183,17 @@ int vp9_compute_global_motion_single_feature_based(
inlier_map);
#ifdef VERBOSE
printf("Inliers = %d\n", num_inliers);
printf("Error Score (inliers) = %g\n",
compute_error_score(type, correspondences, 4, correspondences + 2, 4,
num_correspondences, H, inlier_map));
if (num_inliers) {
printf("Error Score (inliers) = %g\n",
compute_error_score(type, correspondences, 4, correspondences + 2, 4,
num_correspondences, H, inlier_map));
printf("Warp error score = %g\n",
vp9_warp_error_unq(ROTZOOM, H, ref->y_buffer, ref->y_crop_width,
ref->y_crop_height, ref->y_stride,
frm->y_buffer, 0, 0,
frm->y_crop_width, frm->y_crop_height,
frm->y_stride, 0, 0, 16, 16));
}
#endif
free(correspondences);
free(inlier_map);
@ -322,6 +330,12 @@ int vp9_compute_global_motion_multiple_feature_based(
printf("Error Score (inliers) = %g\n",
compute_error_score(type, correspondences, 4, correspondences + 2, 4,
num_correspondences, H, inlier_map));
printf("Warp error score = %g\n",
vp9_warp_error_unq(ROTZOOM, H, ref->y_buffer, ref->y_crop_width,
ref->y_crop_height, ref->y_stride,
frm->y_buffer, 0, 0,
frm->y_crop_width, frm->y_crop_height,
frm->y_stride, 0, 0, 16, 16));
#endif
(void) num_inliers;
free(correspondences);

View File

@ -2741,6 +2741,27 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int_mv single_newmv[MAX_REF_FRAMES],
int *rate_mv);
#if CONFIG_GLOBAL_MOTION
static int get_gmbitcost(const Global_Motion_Params *gm,
const vp9_prob *probs) {
int gmtype_cost[GLOBAL_MOTION_TYPES];
int bits;
vp9_cost_tokens(gmtype_cost, probs, vp9_global_motion_types_tree);
if (gm->rotation == 0 && gm->zoom == 0) {
bits = (gm->mv.as_int == 0 ? 0 : (ABS_TRANSLATION_BITS + 1) * 2);
} else {
bits = (ABS_TRANSLATION_BITS + 1) * 2 +
ABS_ZOOM_BITS + ABS_ROTATION_BITS + 2;
}
return bits * 256 + gmtype_cost[gm->gmtype];
}
#define GLOBAL_MOTION_RATE(ref) \
(cpi->global_motion_used[ref] >= 2 ? 0 : \
get_gmbitcost(&cm->global_motion[(ref)][0], \
cm->fc.global_motion_types_prob) / 2);
#endif // CONFIG_GLOBAL_MOTION
static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
PREDICTION_MODE mode, int_mv this_mv[2],
int_mv frame_mv[MAX_REF_FRAMES],
@ -2750,6 +2771,9 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
#endif // CONFIG_NEW_INTER
int_mv *ref_mv[2],
const int *mvjcost, int *mvcost[2]) {
#if CONFIG_GLOBAL_MOTION
const VP9_COMMON *cm = &cpi->common;
#endif // CONFIG_GLOBAL_MOTION
MODE_INFO *const mic = xd->mi[0].src_mi;
const MB_MODE_INFO *const mbmi = &mic->mbmi;
int thismvcost = 0;
@ -2793,10 +2817,15 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
#if CONFIG_GLOBAL_MOTION
this_mv[0].as_int =
cpi->common.global_motion[mbmi->ref_frame[0]][0].mv.as_int;
thismvcost +=
GLOBAL_MOTION_RATE(mbmi->ref_frame[0]);
#if !CONFIG_NEW_INTER
if (is_compound)
if (is_compound) {
this_mv[1].as_int =
cpi->common.global_motion[mbmi->ref_frame[1]][0].mv.as_int;
thismvcost +=
GLOBAL_MOTION_RATE(mbmi->ref_frame[1]);
}
#endif // !CONFIG_NEW_INTER
#else // CONFIG_GLOBAL_MOTION
this_mv[0].as_int = 0;
@ -2851,6 +2880,9 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
cpi->common.global_motion[mbmi->ref_frame[0]][0].mv.as_int;
this_mv[1].as_int =
cpi->common.global_motion[mbmi->ref_frame[1]][0].mv.as_int;
thismvcost +=
GLOBAL_MOTION_RATE(mbmi->ref_frame[0]) +
GLOBAL_MOTION_RATE(mbmi->ref_frame[1]);
#else
this_mv[0].as_int = 0;
this_mv[1].as_int = 0;
@ -3423,8 +3455,9 @@ static int64_t rd_pick_best_sub8x8_mode(
#if CONFIG_GLOBAL_MOTION
if (get_gmtype(&cm->global_motion[mbmi->ref_frame[0]][0]) ==
GLOBAL_ZERO &&
get_gmtype(&cm->global_motion[mbmi->ref_frame[1]][0]) ==
GLOBAL_ZERO)
(!has_second_rf ||
get_gmtype(&cm->global_motion[mbmi->ref_frame[1]][0]) ==
GLOBAL_ZERO))
#endif
if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
this_mode, mbmi->ref_frame))
@ -5377,6 +5410,17 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*distortion = skip_sse_sb;
}
#if CONFIG_GLOBAL_MOTION
if (this_mode == ZEROMV
#if CONFIG_NEW_INTER
|| this_mode == ZERO_ZEROMV
#endif
) {
*rate2 += GLOBAL_MOTION_RATE(mbmi->ref_frame[0]);
if (is_comp_pred)
*rate2 += GLOBAL_MOTION_RATE(mbmi->ref_frame[1]);
}
#endif // CONFIG_GLOBAL_MOTION
if (!is_comp_pred)
single_skippable[this_mode][refs[0]] = *skippable;
@ -6271,8 +6315,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
#if CONFIG_GLOBAL_MOTION
} else if (get_gmtype(&cm->global_motion[ref_frame][0]) ==
GLOBAL_ZERO &&
get_gmtype(&cm->global_motion[second_ref_frame][0]) ==
GLOBAL_ZERO) {
(!comp_pred ||
get_gmtype(&cm->global_motion[second_ref_frame][0]) ==
GLOBAL_ZERO)) {
#else
} else {
#endif // CONFIG_GLOBAL_MOTION