Optical flow for computing global motion params
Addressing comments, fixing style issues Change-Id: I91b72ab5cdf80d68476858f442616ab3af41e709
This commit is contained in:
parent
9227a372d3
commit
a7aab42915
@ -18,7 +18,7 @@
|
||||
#include "vp9/common/vp9_mv.h"
|
||||
#include "vp9/common/vp9_motion_model.h"
|
||||
|
||||
INLINE projectPointsType get_projectPointsType(TransformationType type) {
|
||||
inline projectPointsType get_projectPointsType(TransformationType type) {
|
||||
switch (type) {
|
||||
case HOMOGRAPHY:
|
||||
return projectPointsHomography;
|
||||
@ -118,8 +118,8 @@ double bicubic(unsigned char *ref, double x, double y, int stride) {
|
||||
return getCubicValue(arr, x - i);
|
||||
}
|
||||
|
||||
static unsigned char interpolate(unsigned char *ref, double x, double y,
|
||||
int width, int height, int stride) {
|
||||
unsigned char interpolate(unsigned char *ref, double x, double y,
|
||||
int width, int height, int stride) {
|
||||
if (x < 0 && y < 0) return ref[0];
|
||||
else if (x < 0 && y > height - 1)
|
||||
return ref[(height - 1) * stride];
|
||||
@ -215,6 +215,34 @@ static void WarpImage(TransformationType type, double *H,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// computes warp error
|
||||
double compute_warp_and_error(TransformationType type,
|
||||
unsigned char *ref,
|
||||
unsigned char *frm,
|
||||
int width, int height, int stride,
|
||||
double *H) {
|
||||
int i, j;
|
||||
double warped;
|
||||
double mse = 0;
|
||||
double err = 0;
|
||||
projectPointsType projectPoints = get_projectPointsType(type);
|
||||
if (projectPoints == NULL) return -1.0;
|
||||
for (i = 0; i < height; ++i)
|
||||
for (j = 0; j < width; ++j) {
|
||||
double in[2], out[2];
|
||||
in[0] = j;
|
||||
in[1] = i;
|
||||
projectPoints(H, in, out, 1, 2, 2);
|
||||
warped = interpolate(ref, out[0], out[1], width, height, stride);
|
||||
err = warped - frm[j + i * stride];
|
||||
mse += err * err;
|
||||
}
|
||||
|
||||
mse /= (width * height);
|
||||
return mse;
|
||||
}
|
||||
|
||||
// Computes the ratio of the warp error to the zero motion error
|
||||
double vp9_warp_erroradv_unq(TransformationType type, double *H,
|
||||
unsigned char *ref,
|
||||
|
@ -49,8 +49,6 @@ static INLINE int get_numparams(TransformationType type) {
|
||||
}
|
||||
}
|
||||
|
||||
INLINE projectPointsType get_projectPointsType(TransformationType type);
|
||||
|
||||
void projectPointsHomography(double *mat, double *points, double *proj,
|
||||
const int n, const int stride_points,
|
||||
const int stride_proj);
|
||||
@ -64,6 +62,8 @@ void projectPointsTranslation(double *mat, double *points, double *proj,
|
||||
const int n, const int stride_points,
|
||||
const int stride_proj);
|
||||
|
||||
projectPointsType get_projectPointsType(TransformationType type);
|
||||
|
||||
void vp9_convert_params_to_rotzoom(Global_Motion_Params *model, double *H);
|
||||
|
||||
void vp9_warp_plane(Global_Motion_Params *gm,
|
||||
@ -92,6 +92,18 @@ double vp9_warp_erroradv_unq(TransformationType type, double *H,
|
||||
int subsampling_col, int subsampling_row,
|
||||
int x_scale, int y_scale);
|
||||
|
||||
double compute_warp_and_error(TransformationType type,
|
||||
unsigned char *ref,
|
||||
unsigned char *frm,
|
||||
int width, int height, int stride,
|
||||
double *H);
|
||||
|
||||
|
||||
|
||||
unsigned char interpolate(unsigned char *ref, double x, double y,
|
||||
int width, int height, int stride);
|
||||
|
||||
|
||||
int_mv vp9_get_global_sb_center_mv(int col, int row, int bw, int bh,
|
||||
Global_Motion_Params *model);
|
||||
int_mv vp9_get_global_sub8x8_center_mv(int col, int row, int block,
|
||||
|
@ -24,11 +24,6 @@
|
||||
|
||||
#define THRESHOLD_NCC 0.80
|
||||
|
||||
typedef struct {
|
||||
int x, y;
|
||||
int rx, ry;
|
||||
} correspondence;
|
||||
|
||||
static double compute_variance(unsigned char *im, int stride,
|
||||
int x, int y, double *mean) {
|
||||
double sum = 0.0;
|
||||
@ -121,8 +116,8 @@ static void improve_correspondence(unsigned char *frm, unsigned char *ref,
|
||||
}
|
||||
}
|
||||
}
|
||||
correspondences[i].rx += best_x;
|
||||
correspondences[i].ry += best_y;
|
||||
correspondences[i].rx += (double) best_x;
|
||||
correspondences[i].ry += (double) best_y;
|
||||
}
|
||||
for (i = 0; i < num_correspondences; ++i) {
|
||||
double template_norm = compute_variance(
|
||||
@ -164,7 +159,7 @@ int determine_correspondence(unsigned char *frm,
|
||||
int *ref_corners, int num_ref_corners,
|
||||
int width, int height,
|
||||
int frm_stride, int ref_stride,
|
||||
int *correspondence_pts) {
|
||||
double *correspondence_pts) {
|
||||
// Debargha: Improve this to include 2-way match
|
||||
int i, j;
|
||||
correspondence *correspondences = (correspondence *)correspondence_pts;
|
||||
@ -203,18 +198,12 @@ int determine_correspondence(unsigned char *frm,
|
||||
}
|
||||
}
|
||||
if (best_match_ncc > THRESHOLD_NCC) {
|
||||
correspondences[num_correspondences].x = frm_corners[2 * i];
|
||||
correspondences[num_correspondences].y = frm_corners[2 * i + 1];
|
||||
correspondences[num_correspondences].rx = ref_corners[2 * best_match_j];
|
||||
correspondences[num_correspondences].ry =
|
||||
correspondences[num_correspondences].x = (double) frm_corners[2 * i];
|
||||
correspondences[num_correspondences].y = (double) frm_corners[2 * i + 1];
|
||||
correspondences[num_correspondences].rx = (double)
|
||||
ref_corners[2 * best_match_j];
|
||||
correspondences[num_correspondences].ry = (double)
|
||||
ref_corners[2 * best_match_j + 1];
|
||||
/*
|
||||
printf(" %d %d %d %d\n",
|
||||
correspondences[num_correspondences].x,
|
||||
correspondences[num_correspondences].y,
|
||||
correspondences[num_correspondences].rx,
|
||||
correspondences[num_correspondences].ry);
|
||||
*/
|
||||
num_correspondences++;
|
||||
}
|
||||
}
|
||||
|
@ -16,12 +16,17 @@
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
|
||||
typedef struct {
|
||||
double x, y;
|
||||
double rx, ry;
|
||||
} correspondence;
|
||||
|
||||
int determine_correspondence(unsigned char *frm,
|
||||
int *frm_corners, int num_frm_corners,
|
||||
unsigned char *ref,
|
||||
int *ref_corners, int num_ref_corners,
|
||||
int width, int height,
|
||||
int frm_stride, int ref_stride,
|
||||
int *correspondence_pts);
|
||||
double *correspondence_pts);
|
||||
|
||||
#endif // VP9_ENCODER_VP9_CORNER_MATCH_H
|
||||
|
@ -4119,6 +4119,7 @@ static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
|
||||
#define GLOBAL_MOTION_ADVANTAGE_THRESH_RZ 0.60
|
||||
#define GLOBAL_MOTION_ADVANTAGE_THRESH_TR 0.75
|
||||
// #define USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
|
||||
// #define USE_FEATURE_BASED_GLOBAL_MOTION_COMPUTATION
|
||||
|
||||
static void convert_translation_to_params(
|
||||
double *H, Global_Motion_Params *model) {
|
||||
@ -4191,7 +4192,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
||||
|
||||
xd->mi = cm->mi;
|
||||
xd->mi[0].src_mi = &xd->mi[0];
|
||||
|
||||
vp9_zero(cm->counts);
|
||||
vp9_zero(cpi->coef_counts);
|
||||
vp9_zero(rd_opt->comp_pred_diff);
|
||||
@ -4217,6 +4217,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
||||
YV12_BUFFER_CONFIG *ref_buf;
|
||||
int num, frame;
|
||||
double global_motion[9 * MAX_GLOBAL_MOTION_MODELS];
|
||||
|
||||
for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
|
||||
ref_buf = get_ref_frame_buffer(cpi, frame);
|
||||
if (ref_buf) {
|
||||
@ -4225,11 +4226,15 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
||||
vp9_compute_global_motion_multiple_block_based(
|
||||
cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
|
||||
BLOCK_16X16, MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
|
||||
#else
|
||||
#elif defined(USE_FEATURE_BASED_GLOBAL_MOTION_COMPUTATION)
|
||||
vp9_compute_global_motion_multiple_feature_based(
|
||||
cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
|
||||
MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
|
||||
#endif // USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
|
||||
#else
|
||||
vp9_compute_global_motion_multiple_optical_flow(
|
||||
cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
|
||||
MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
|
||||
#endif
|
||||
int i;
|
||||
for (i = 0; i < num; i++) {
|
||||
convert_model_to_params(
|
||||
@ -4285,6 +4290,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (get_gmtype(&cm->global_motion[frame][i]) != GLOBAL_ZERO)
|
||||
printf("Found it %d/%d - %d [%f %f]\n",
|
||||
cm->current_video_frame, cm->show_frame, frame,
|
||||
erroradvantage, erroradvantage_trans);
|
||||
}
|
||||
}
|
||||
cm->num_global_motion[frame] = num;
|
||||
@ -4292,6 +4301,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CONFIG_GLOBAL_MOTION
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "vp9/common/vp9_motion_model.h"
|
||||
#include "vp9/encoder/vp9_corner_detect.h"
|
||||
#include "vp9/encoder/vp9_corner_match.h"
|
||||
#include "vp9/encoder/vp9_opticalflow.h"
|
||||
#include "vp9/encoder/vp9_ransac.h"
|
||||
#include "vp9/encoder/vp9_global_motion.h"
|
||||
#include "vp9/encoder/vp9_motion_field.h"
|
||||
@ -36,6 +37,8 @@
|
||||
|
||||
#define MIN_INLIER_PROB 0.1
|
||||
|
||||
// #define PARAM_SEARCH
|
||||
|
||||
#define MAX_CORNERS 4096
|
||||
|
||||
INLINE ransacType get_ransacType(TransformationType type) {
|
||||
@ -92,13 +95,60 @@ static double compute_error_score(TransformationType type,
|
||||
return sqrt(sqerr / n);
|
||||
}
|
||||
|
||||
void refine_param(TransformationType type, unsigned char *frm,
|
||||
unsigned char *ref, double *H,
|
||||
int param_index, int width, int height,
|
||||
int stride, int n_refinements) {
|
||||
int i;
|
||||
double step_mse;
|
||||
double best_mse;
|
||||
double curr_mse;
|
||||
double curr_param = H[param_index];
|
||||
double step = 0.5;
|
||||
double best_param = curr_param;
|
||||
|
||||
curr_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
|
||||
best_mse = curr_mse;
|
||||
for (i = 0; i < n_refinements; i++) {
|
||||
// look to the left
|
||||
H[param_index] = curr_param - step;
|
||||
step_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
|
||||
if (step_mse < best_mse) {
|
||||
step /= 2;
|
||||
best_mse = step_mse;
|
||||
best_param = H[param_index];
|
||||
curr_param = best_param;
|
||||
curr_mse = step_mse;
|
||||
continue;
|
||||
}
|
||||
|
||||
// look to the right
|
||||
H[param_index] = curr_param + step;
|
||||
step_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
|
||||
if (step_mse < best_mse) {
|
||||
step /= 2;
|
||||
best_mse = step_mse;
|
||||
best_param = H[param_index];
|
||||
curr_param = best_param;
|
||||
curr_mse = step_mse;
|
||||
continue;
|
||||
}
|
||||
|
||||
// no improvement found-> means we're either already at a minimum or
|
||||
// step is too wide
|
||||
step /= 4;
|
||||
}
|
||||
|
||||
H[param_index] = best_param;
|
||||
}
|
||||
|
||||
static int compute_global_motion_single(TransformationType type,
|
||||
int *correspondences,
|
||||
double *correspondences,
|
||||
int num_correspondences,
|
||||
double *H,
|
||||
int *inlier_map) {
|
||||
double *mp, *matched_points;
|
||||
int *cp = correspondences;
|
||||
double *cp = correspondences;
|
||||
int i, result;
|
||||
int num_inliers = 0;
|
||||
ransacType ransac = get_ransacType(type);
|
||||
@ -138,12 +188,13 @@ int vp9_compute_global_motion_single_feature_based(
|
||||
double *H) {
|
||||
int num_frm_corners, num_ref_corners;
|
||||
int num_correspondences;
|
||||
int *correspondences;
|
||||
double *correspondences;
|
||||
int num_inliers;
|
||||
int *inlier_map = NULL;
|
||||
int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
|
||||
(void) cpi;
|
||||
|
||||
|
||||
#ifdef USE_FAST_CORNER
|
||||
num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width,
|
||||
frm->y_height, frm->y_stride,
|
||||
@ -162,7 +213,7 @@ int vp9_compute_global_motion_single_feature_based(
|
||||
printf("Reference corners = %d\n", num_ref_corners);
|
||||
#endif
|
||||
|
||||
correspondences = (int *) malloc(num_frm_corners * 4 *
|
||||
correspondences = (double *) malloc(num_frm_corners * 4 *
|
||||
sizeof(*correspondences));
|
||||
|
||||
num_correspondences = determine_correspondence(frm->y_buffer,
|
||||
@ -201,14 +252,14 @@ int vp9_compute_global_motion_single_feature_based(
|
||||
}
|
||||
|
||||
static int compute_global_motion_multiple(TransformationType type,
|
||||
int *correspondences,
|
||||
double *correspondences,
|
||||
int num_correspondences,
|
||||
double *H,
|
||||
int max_models,
|
||||
double inlier_prob,
|
||||
int *num_models,
|
||||
int *processed_mask) {
|
||||
int *cp = correspondences;
|
||||
double *cp = correspondences;
|
||||
double *mp, *matched_points;
|
||||
int *best_inlier_mask;
|
||||
int i, result;
|
||||
@ -268,6 +319,85 @@ static int compute_global_motion_multiple(TransformationType type,
|
||||
return num_inliers_sum;
|
||||
}
|
||||
|
||||
int vp9_compute_global_motion_multiple_optical_flow(struct VP9_COMP *cpi,
|
||||
TransformationType type,
|
||||
YV12_BUFFER_CONFIG *frm,
|
||||
YV12_BUFFER_CONFIG *ref,
|
||||
int max_models,
|
||||
double inlier_prob,
|
||||
double *H) {
|
||||
int num_correspondences = 0;
|
||||
double *correspondence_pts = (double *)malloc(frm->y_width * frm->y_height *
|
||||
sizeof(correspondence));
|
||||
correspondence *correspondences = (correspondence *)correspondence_pts;
|
||||
int num_inliers;
|
||||
int i, j;
|
||||
int num_models = 0;
|
||||
int *inlier_map = NULL;
|
||||
double *u = (double *)malloc(frm->y_width * frm->y_height * sizeof(u));
|
||||
double *v = (double *)malloc(frm->y_width * frm->y_height * sizeof(v));
|
||||
double *confidence = (double *)malloc(frm->y_width * frm->y_height *
|
||||
sizeof(confidence));
|
||||
|
||||
(void) cpi;
|
||||
compute_flow(frm->y_buffer, ref->y_buffer, u, v, confidence,
|
||||
frm->y_width, frm->y_height, frm->y_stride,
|
||||
ref->y_stride, 1);
|
||||
|
||||
// avoid the boundaries because of artifacts
|
||||
for (i = 10; i < frm->y_width - 10; ++i)
|
||||
for (j = 10; j < frm->y_height - 10; ++j) {
|
||||
// Note that this threshold could still benefit from tuning. Confidence
|
||||
// is based on the inverse of the harris corner score.
|
||||
if (confidence[i + j * frm->y_width] < 0.01) {
|
||||
correspondences[num_correspondences].x = i;
|
||||
correspondences[num_correspondences].y = j;
|
||||
correspondences[num_correspondences].rx = i - u[i + j * frm->y_width];
|
||||
correspondences[num_correspondences].ry = j - v[i + j * frm->y_width];
|
||||
num_correspondences++;
|
||||
}
|
||||
}
|
||||
printf("Number of correspondences = %d\n", num_correspondences);
|
||||
#ifdef VERBOSE
|
||||
printf("Number of correspondences = %d\n", num_correspondences);
|
||||
#endif
|
||||
inlier_map = (int *)malloc(num_correspondences * sizeof(*inlier_map));
|
||||
num_inliers = compute_global_motion_multiple(type, correspondence_pts,
|
||||
num_correspondences, H,
|
||||
max_models, inlier_prob,
|
||||
&num_models, inlier_map);
|
||||
#ifdef PARAM_SEARCH
|
||||
for (j = 0; j < num_models; ++j) {
|
||||
for (i = 0; i < get_numparams(type); ++i)
|
||||
|
||||
refine_param(type, frm->y_buffer, ref->y_buffer, H,
|
||||
i + j * get_numparams(type), frm->y_width, frm->y_height,
|
||||
frm->y_stride, 2);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VERBOSE
|
||||
printf("Models = %d, Inliers = %d\n", num_models, num_inliers);
|
||||
if (num_models)
|
||||
printf("Error Score (inliers) = %g\n",
|
||||
compute_error_score(type, correspondences, 4, correspondences + 2, 4,
|
||||
num_correspondences, H, inlier_map));
|
||||
printf("Warp error score = %g\n",
|
||||
vp9_warp_error_unq(ROTZOOM, H, ref->y_buffer, ref->y_crop_width,
|
||||
ref->y_crop_height, ref->y_stride,
|
||||
frm->y_buffer, 0, 0,
|
||||
frm->y_crop_width, frm->y_crop_height,
|
||||
frm->y_stride, 0, 0, 16, 16));
|
||||
#endif
|
||||
(void) num_inliers;
|
||||
free(correspondences);
|
||||
free(inlier_map);
|
||||
free(u);
|
||||
free(v);
|
||||
free(confidence);
|
||||
return num_models;
|
||||
}
|
||||
|
||||
// Returns number of models actually returned
|
||||
int vp9_compute_global_motion_multiple_feature_based(
|
||||
struct VP9_COMP *cpi,
|
||||
@ -279,12 +409,15 @@ int vp9_compute_global_motion_multiple_feature_based(
|
||||
double *H) {
|
||||
int num_frm_corners, num_ref_corners;
|
||||
int num_correspondences;
|
||||
int *correspondences;
|
||||
double *correspondences;
|
||||
int num_inliers;
|
||||
int i, j;
|
||||
int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
|
||||
int num_models = 0;
|
||||
int *inlier_map = NULL;
|
||||
(void) cpi;
|
||||
(void) i;
|
||||
(void) j;
|
||||
|
||||
#ifdef USE_FAST_CORNER
|
||||
num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width,
|
||||
@ -304,7 +437,7 @@ int vp9_compute_global_motion_multiple_feature_based(
|
||||
printf("Reference corners = %d\n", num_ref_corners);
|
||||
#endif
|
||||
|
||||
correspondences = (int *) malloc(num_frm_corners * 4 *
|
||||
correspondences = (double *) malloc(num_frm_corners * 4 *
|
||||
sizeof(*correspondences));
|
||||
|
||||
num_correspondences = determine_correspondence(frm->y_buffer,
|
||||
@ -316,6 +449,7 @@ int vp9_compute_global_motion_multiple_feature_based(
|
||||
frm->y_width, frm->y_height,
|
||||
frm->y_stride, ref->y_stride,
|
||||
correspondences);
|
||||
printf("Number of correspondences = %d\n", num_correspondences);
|
||||
#ifdef VERBOSE
|
||||
printf("Number of correspondences = %d\n", num_correspondences);
|
||||
#endif
|
||||
@ -324,6 +458,16 @@ int vp9_compute_global_motion_multiple_feature_based(
|
||||
num_correspondences, H,
|
||||
max_models, inlier_prob,
|
||||
&num_models, inlier_map);
|
||||
#ifdef PARAM_SEARCH
|
||||
for (j = 0; j < num_models; ++j) {
|
||||
for (i = 0; i < get_numparams(type); ++i)
|
||||
|
||||
refine_param(type, frm->y_buffer, ref->y_buffer, H,
|
||||
i + j * get_numparams(type), frm->y_width, frm->y_height,
|
||||
frm->y_stride, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VERBOSE
|
||||
printf("Models = %d, Inliers = %d\n", num_models, num_inliers);
|
||||
if (num_models)
|
||||
@ -352,7 +496,7 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
|
||||
double *H) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
int num_correspondences = 0;
|
||||
int *correspondences;
|
||||
double *correspondences;
|
||||
int num_inliers;
|
||||
int *inlier_map = NULL;
|
||||
int bwidth = num_4x4_blocks_wide_lookup[bsize] << 2;
|
||||
@ -363,7 +507,7 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
|
||||
|
||||
vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);
|
||||
|
||||
correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
|
||||
correspondences = (double *)malloc(4 * cm->mb_rows * cm->mb_cols *
|
||||
sizeof(*correspondences));
|
||||
|
||||
for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
|
||||
@ -407,7 +551,7 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
|
||||
double *H) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
int num_correspondences = 0;
|
||||
int *correspondences;
|
||||
double *correspondences;
|
||||
int num_inliers;
|
||||
int num_models = 0;
|
||||
int *inlier_map = NULL;
|
||||
@ -419,7 +563,7 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
|
||||
double confidence[4096];
|
||||
vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);
|
||||
|
||||
correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
|
||||
correspondences = (double *)malloc(4 * cm->mb_rows * cm->mb_cols *
|
||||
sizeof(*correspondences));
|
||||
|
||||
for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
|
||||
|
@ -28,6 +28,14 @@ static const int CONFIDENCE_THRESHOLD = 1.0;
|
||||
|
||||
INLINE ransacType get_ransacType(TransformationType type);
|
||||
|
||||
// Searches around each parameter and seeks to minimize MSE between
|
||||
// the warped frame produced from the set of parameters and the frame being
|
||||
// approximated.
|
||||
void refine_param(TransformationType type, unsigned char *frm,
|
||||
unsigned char *ref, double *H,
|
||||
int param_index, int width, int height,
|
||||
int stride, int n_refinements);
|
||||
|
||||
// Returns number of models actually returned: 1 - if success, 0 - if failure
|
||||
int vp9_compute_global_motion_single_feature_based(struct VP9_COMP *cpi,
|
||||
TransformationType type,
|
||||
@ -58,6 +66,15 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
|
||||
BLOCK_SIZE bsize,
|
||||
double *H);
|
||||
|
||||
int vp9_compute_global_motion_multiple_optical_flow(struct VP9_COMP *cpi,
|
||||
TransformationType type,
|
||||
YV12_BUFFER_CONFIG *frm,
|
||||
YV12_BUFFER_CONFIG *ref,
|
||||
int max_models,
|
||||
double inlier_prob,
|
||||
double *H);
|
||||
|
||||
|
||||
// Returns number of models actually returned: 1+ - #models, 0 - if failure
|
||||
// max_models is the maximum number of models returned
|
||||
// inlier_prob is the probability of being inlier over all the models
|
||||
|
623
vp9/encoder/vp9_opticalflow.c
Normal file
623
vp9/encoder/vp9_opticalflow.c
Normal file
@ -0,0 +1,623 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be
|
||||
* found in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "vp9/common/vp9_motion_model.h"
|
||||
#include "vp9/encoder/vp9_opticalflow.h"
|
||||
#include "vp9/encoder/vp9_ransac.h"
|
||||
#include "vp9/encoder/vp9_resize.h"
|
||||
|
||||
// ITERATIONS is the number of iterative refinements to make on an initial
|
||||
// flow estimate. 3 seems to work well but feel free to play with this.
|
||||
#define ITERATIONS 3
|
||||
#define MAX_MEDIAN_LENGTH 121
|
||||
#define MAX_LEVELS 8
|
||||
#define MIN_LEVEL_SIZE 30
|
||||
#define MAX_ERROR 0.00001
|
||||
#define BLUR_SIZE 11
|
||||
#define BLUR_SIGMA 1.5
|
||||
#define BLUR_AMP 1
|
||||
#define MEDIAN_FILTER_SIZE 5
|
||||
#define RELATIVE_ERROR(a, b) (fabs(((a) - (b))))
|
||||
#define WRAP_PIXEL(p, size) ((p) < 0 ? abs((p)) - 1 : \
|
||||
((p) >= (size) ? (2 * (size) - 1 - (p)) : (p)))
|
||||
|
||||
// Struct for an image pyramid
|
||||
typedef struct {
|
||||
int n_levels;
|
||||
int widths[MAX_LEVELS];
|
||||
int heights[MAX_LEVELS];
|
||||
int strides[MAX_LEVELS];
|
||||
unsigned char **levels;
|
||||
} imPyramid;
|
||||
|
||||
typedef struct {
|
||||
int stride;
|
||||
double *confidence;
|
||||
double *u;
|
||||
double *v;
|
||||
} flowMV;
|
||||
|
||||
static void * safe_malloc(size_t size) {
|
||||
void *v = malloc(size);
|
||||
if (!v) {
|
||||
fprintf(stderr, "Not enough memory\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
// Produces an image pyramid where each level is resized by a
|
||||
// specified resize factor
|
||||
static void image_pyramid(unsigned char *img, imPyramid *pyr, int width,
|
||||
int height, int stride, int n_levels) {
|
||||
int i;
|
||||
int max_levels = 1;
|
||||
int scaled = width < height ? (width/MIN_LEVEL_SIZE) :
|
||||
(height/MIN_LEVEL_SIZE);
|
||||
pyr->widths[0] = width;
|
||||
pyr->heights[0] = height;
|
||||
pyr->strides[0] = stride;
|
||||
|
||||
if (n_levels == 1) {
|
||||
pyr->n_levels = 1;
|
||||
pyr->levels = (unsigned char**)safe_malloc(sizeof(*pyr->levels));
|
||||
pyr->levels[0] = img;
|
||||
return;
|
||||
}
|
||||
// compute number of possible levels of the pyramid. The smallest dim of the
|
||||
// smallest level should be no less than 30 px.
|
||||
while (scaled > 1) {
|
||||
max_levels++;
|
||||
scaled >>= 1;
|
||||
}
|
||||
|
||||
if (n_levels > max_levels)
|
||||
n_levels = max_levels;
|
||||
|
||||
pyr->n_levels = n_levels;
|
||||
pyr->levels = (unsigned char**)safe_malloc(n_levels * sizeof(*pyr->levels));
|
||||
pyr->levels[0] = img;
|
||||
|
||||
// compute each level
|
||||
for (i = 1; i < n_levels; ++i) {
|
||||
pyr->widths[i] = pyr->widths[i-1] >> 1;
|
||||
pyr->heights[i] = pyr->heights[i-1] >> 1;
|
||||
pyr->strides[i] = pyr->widths[i];
|
||||
pyr->levels[i] = (unsigned char*)safe_malloc(pyr->widths[i] *
|
||||
pyr->heights[i] *
|
||||
sizeof(*pyr->levels[i]));
|
||||
|
||||
vp9_resize_plane(pyr->levels[i-1], pyr->heights[i-1], pyr->widths[i-1],
|
||||
pyr->strides[i-1], pyr->levels[i], pyr->heights[i],
|
||||
pyr->widths[i], pyr->strides[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void destruct_pyramid(imPyramid *pyr) {
|
||||
int i;
|
||||
|
||||
// start at level 1 because level 0 is a reference to the original image
|
||||
for (i = 1; i < pyr->n_levels; ++i)
|
||||
free(pyr->levels[i]);
|
||||
free(pyr->levels);
|
||||
}
|
||||
|
||||
// Convolution with reflected content padding to minimize edge artifacts.
|
||||
static void convolve_char(double *filter, unsigned char *image,
|
||||
double *convolved, int filt_width, int filt_height,
|
||||
int image_width, int image_height, int image_stride,
|
||||
int convolved_stride) {
|
||||
int i, j, x, y, imx, imy;
|
||||
double pixel_val;
|
||||
double filter_val;
|
||||
double image_val;
|
||||
int x_pad_size = filt_width >> 1;
|
||||
int y_pad_size = filt_height >> 1;
|
||||
for (j = 0; j < image_height; ++j)
|
||||
for (i = 0; i < image_width; ++i) {
|
||||
pixel_val = 0;
|
||||
for (y = (-y_pad_size); y<= y_pad_size; y++)
|
||||
for (x = (-x_pad_size); x <= x_pad_size; x++) {
|
||||
filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
|
||||
imx = WRAP_PIXEL(i + x, image_width);
|
||||
imy = WRAP_PIXEL(j + y, image_height);
|
||||
image_val = image[imx + imy * image_stride];
|
||||
pixel_val += (filter_val * image_val);
|
||||
}
|
||||
convolved[i + j * convolved_stride] = pixel_val;
|
||||
}
|
||||
}
|
||||
|
||||
// Convolution with reflected content padding to minimize edge artifacts.
|
||||
static void convolve_double(double *filter, double *image, double *convolved,
|
||||
int filt_width, int filt_height, int image_width,
|
||||
int image_height, int image_stride,
|
||||
int convolved_stride) {
|
||||
int i, j, x, y, imx, imy;
|
||||
double pixel_val;
|
||||
double filter_val;
|
||||
double image_val;
|
||||
int x_pad_size = filt_width >> 1;
|
||||
int y_pad_size = filt_height >> 1;
|
||||
for (j = 0; j < image_height; ++j)
|
||||
for (i = 0; i < image_width; ++i) {
|
||||
pixel_val = 0;
|
||||
for (y = (-y_pad_size); y<= y_pad_size; y++)
|
||||
for (x = (-x_pad_size); x <= x_pad_size; x++) {
|
||||
filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
|
||||
imx = WRAP_PIXEL(i + x, image_width);
|
||||
imy = WRAP_PIXEL(j + y, image_height);
|
||||
image_val = image[imx + imy * image_stride];
|
||||
pixel_val += (filter_val * image_val);
|
||||
}
|
||||
convolved[i + j * convolved_stride] = pixel_val;
|
||||
}
|
||||
}
|
||||
|
||||
// computes x and y spatial derivatives of an image.
|
||||
static void differentiate(double *img, int width, int height,
|
||||
int stride, double *dx_img, double *dy_img,
|
||||
int deriv_stride) {
|
||||
const double spatial_deriv_kernel[] = {-0.0833, 0.6667, 0.0,
|
||||
-0.6667, 0.0833};
|
||||
convolve_double(spatial_deriv_kernel, img, dx_img, 5, 1, width, height,
|
||||
stride, deriv_stride);
|
||||
convolve_double(spatial_deriv_kernel, img, dy_img, 1, 5, width, height,
|
||||
stride, deriv_stride);
|
||||
}
|
||||
|
||||
// creates a 2D gaussian kernel
|
||||
static void gaussian_kernel(double *mat, int size, double sig, double amp) {
|
||||
int x, y;
|
||||
double filter_val;
|
||||
int half;
|
||||
double denominator = 1 / (2.0 * sig * sig);
|
||||
double sum = 0.0f;
|
||||
|
||||
size |= 1;
|
||||
half = size >> 1;
|
||||
for (y = -half; y<= half; y++)
|
||||
for (x = -half; x<= half; x++) {
|
||||
filter_val = amp * exp((double)(-1.0 * ((x * x + y * y) * denominator)));
|
||||
mat[(x + half) + ((y + half) * size)] = filter_val;
|
||||
sum += filter_val;
|
||||
}
|
||||
|
||||
// normalize the filter
|
||||
if (sum > MAX_ERROR) {
|
||||
sum = 1 / sum;
|
||||
for (x = 0; x < size * size; x++) {
|
||||
mat[x] = mat[x] * sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// blurs image with a gaussian kernel
|
||||
static void blur_img(unsigned char *img, int width, int height,
|
||||
int stride, double *smooth_img, int smooth_stride,
|
||||
int smoothing_size, double smoothing_sig,
|
||||
double smoothing_amp) {
|
||||
double *smoothing_kernel = (double *)safe_malloc(smoothing_size *
|
||||
smoothing_size *
|
||||
sizeof(*smoothing_kernel));
|
||||
gaussian_kernel(smoothing_kernel, smoothing_size, smoothing_sig,
|
||||
smoothing_amp);
|
||||
convolve_char(smoothing_kernel, img, smooth_img, smoothing_size,
|
||||
smoothing_size, width, height, stride, smooth_stride);
|
||||
free(smoothing_kernel);
|
||||
}
|
||||
|
||||
// Implementation of Hoare's select for linear time median selection.
|
||||
// Takes in a pointer to the beginning of a list of values, the length
|
||||
// of the list, an integer representing the index of the number to be
|
||||
// selected, and an unsigned integer used to seed a random number generator.
|
||||
static double selection(double *vals, int length, int k, unsigned int seed) {
|
||||
int pivot_ind, x;
|
||||
double pivot;
|
||||
double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
|
||||
int l = 0, e = 0, g = 0;
|
||||
pivot_ind = (int) ((length / ((double)RAND_MAX)) * rand_r(&seed));
|
||||
pivot = vals[pivot_ind];
|
||||
for (x = 0; x < length; ++x) {
|
||||
if (RELATIVE_ERROR(vals[x], pivot) <= MAX_ERROR) {
|
||||
e++;
|
||||
} else if (vals[x] < pivot) {
|
||||
L[l] = vals[x];
|
||||
l++;
|
||||
} else if (vals[x] > pivot) {
|
||||
G[g] = vals[x];
|
||||
g++;
|
||||
}
|
||||
}
|
||||
|
||||
if (k <= l)
|
||||
return selection(L, l, k, seed);
|
||||
else if (k <= l + e)
|
||||
return pivot;
|
||||
else
|
||||
return selection(G, g, k - l - e, seed);
|
||||
}
|
||||
|
||||
// Performs median filtering for denoising. This implementation uses hoare's
|
||||
// select to find the median in a block. block_x, block_y are
|
||||
// the x and y coordinates of the center of the block in the source.
|
||||
static void median_filter(double *source, double *filtered, int block_size,
|
||||
int width, int height, int source_stride,
|
||||
int filtered_stride) {
|
||||
int i, j, block_x, block_y, imx, imy;
|
||||
double pivot, val;
|
||||
int length = block_size * block_size;
|
||||
double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
|
||||
int k = length >> 1 | 1;
|
||||
int l, e, g;
|
||||
int pad_size = block_size >> 1;
|
||||
unsigned int seed = (unsigned int)*source;
|
||||
|
||||
// find the median within each block. Reflected content is used for padding.
|
||||
for (block_y = 0; block_y < height; ++block_y)
|
||||
for (block_x = 0; block_x < width; ++block_x) {
|
||||
l = 0, e = 0, g = 0;
|
||||
memset(L, 0, length * sizeof(*L));
|
||||
memset(G, 0, length * sizeof(*G));
|
||||
pivot = source[block_x + block_y * source_stride];
|
||||
for (j = -pad_size; j <= pad_size; ++j)
|
||||
for (i = -pad_size; i <= pad_size; ++i) {
|
||||
imx = WRAP_PIXEL(i + block_x, width);
|
||||
imy = WRAP_PIXEL(j + block_y, height);
|
||||
val = source[imx + imy * source_stride];
|
||||
|
||||
// pulling out the first iteration of selection so we don't have
|
||||
// to iterate through the block to flatten it and then
|
||||
// iterate through those same values to put them into
|
||||
// the L E G bins in selection. Ugly but more efficent.
|
||||
if (RELATIVE_ERROR(val, pivot) <= MAX_ERROR) {
|
||||
e++;
|
||||
} else if (val < pivot) {
|
||||
L[l] = val;
|
||||
l++;
|
||||
} else if (val > pivot) {
|
||||
G[g] = val;
|
||||
g++;
|
||||
}
|
||||
}
|
||||
if (k <= l)
|
||||
filtered[block_x + block_y * filtered_stride] =
|
||||
selection(L, l, k, seed);
|
||||
else if (k <= l + e)
|
||||
filtered[block_x + block_y * filtered_stride] = pivot;
|
||||
else
|
||||
filtered[block_x + block_y * filtered_stride] = selection(G, g, k -
|
||||
l - e, seed);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pointwise_matrix_sub(double *mat1, double *mat2,
|
||||
double *diff, int width, int height,
|
||||
int stride1, int stride2,
|
||||
int diffstride) {
|
||||
int i, j;
|
||||
for (j = 0; j < height; ++j)
|
||||
for (i = 0; i < width; ++i) {
|
||||
diff[i + j * diffstride] = mat1[i + j * stride1] - mat2[i + j * stride2];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pointwise_matrix_add(double *mat1, double *mat2,
|
||||
double *sum, int width, int height,
|
||||
int stride1, int stride2,
|
||||
int sumstride) {
|
||||
int i, j;
|
||||
for (j = 0; j < height; ++j)
|
||||
for (i = 0; i < width; ++i) {
|
||||
sum[i + j * sumstride] = mat1[i + j * stride1] + mat2[i + j * stride2];
|
||||
}
|
||||
}
|
||||
|
||||
// Solves lucas kanade equation at any give pixel in the image using a local
|
||||
// neighborhood for support. loc_x and loc_y are the x and y components
|
||||
// of the center of the local neighborhood. Assumes dx, dy and dt have same
|
||||
// stride. window is a wind_size x wind_size set of weights for the
|
||||
// window_size x window_size neighborhood surrounding loc_x and loc_y.
|
||||
static void optical_flow_per_pixel(double *dx, double *dy, double *dt,
|
||||
double *window, int wind_size,
|
||||
flowMV *flow, int width, int height,
|
||||
int stride, int loc_x, int loc_y) {
|
||||
int i, j, iw, jw, im_ind;
|
||||
double g = 0;
|
||||
// M and b are matrices used to solve the equation a = M^-1 * b where a
|
||||
// are the desired optical flow parameters
|
||||
double M[4] = {0, 0, 0, 0};
|
||||
double b[2] = {0, 0};
|
||||
double det = 0;
|
||||
double trace2 = 0;
|
||||
double corner_score = 0;
|
||||
int step = wind_size >> 1;
|
||||
double gdx = 0, gdy = 0;
|
||||
|
||||
for (j = loc_y - step; j <= loc_y + step; ++j)
|
||||
for (i = loc_x - step; i <= loc_x + step; ++i) {
|
||||
// if pixel is out of bounds, use reflected image content
|
||||
iw = WRAP_PIXEL(i, width);
|
||||
jw = WRAP_PIXEL(j, height);
|
||||
im_ind = iw + jw * stride;
|
||||
g = window[(i - loc_x + step) + (j - loc_y + step) * wind_size];
|
||||
gdx = g * dx[im_ind];
|
||||
gdy = g * dy[im_ind];
|
||||
M[0] += gdx * dx[im_ind];
|
||||
M[1] += gdx * dy[im_ind];
|
||||
M[3] += gdy * dy[im_ind];
|
||||
b[0] += -gdx * dt[im_ind];
|
||||
b[1] += -gdy * dt[im_ind];
|
||||
}
|
||||
M[2] = M[1];
|
||||
det = (M[0] * M[3]) - (M[1] * M[1]);
|
||||
trace2 = (M[0] + M[3]) * (M[0] + M[3]);
|
||||
if (RELATIVE_ERROR(det, 0) > MAX_ERROR) {
|
||||
const double det_inv = 1 / det;
|
||||
const double mult_b0 = det_inv * b[0];
|
||||
const double mult_b1 = det_inv * b[1];
|
||||
flow->u[loc_x + loc_y * flow->stride] = M[3] * mult_b0 - M[1] * mult_b1;
|
||||
flow->v[loc_x + loc_y * flow->stride] = -M[2] * mult_b0 + M[0] * mult_b1;
|
||||
} else {
|
||||
if (M[0] == 0 && M[3] == 0) {
|
||||
flow->u[loc_x + loc_y * flow->stride] = 0;
|
||||
flow->v[loc_x + loc_y * flow->stride] = 0;
|
||||
} else {
|
||||
const double trace2_inv = 1 / trace2;
|
||||
const double mult_b0 = trace2_inv * b[0];
|
||||
const double mult_b1 = trace2_inv * b[1];
|
||||
flow->u[loc_x + loc_y * flow->stride] = M[0] * mult_b0 + M[1] * mult_b1;
|
||||
flow->v[loc_x + loc_y * flow->stride] = M[2] * mult_b0 + M[3] * mult_b1;
|
||||
}
|
||||
}
|
||||
// compute inverse harris corner score as confidence metric
|
||||
corner_score = 1 / (det - (0.01 * trace2));
|
||||
flow->confidence[loc_x + loc_y * flow->stride] = corner_score > 2 ?
|
||||
2 : corner_score;
|
||||
}
|
||||
|
||||
// computes lucas kanade optical flow. Note that this assumes images that
|
||||
// already denoised and differentiated.
|
||||
static void lucas_kanade_base(double *smooth_frm, double *smooth_ref,
|
||||
double *dx, double *dy, double *dt, flowMV *flow,
|
||||
int width, int height, int smooth_frm_stride,
|
||||
int smooth_ref_stride, int deriv_stride) {
|
||||
int i, j;
|
||||
const int local_neighborhood_sz = 5;
|
||||
// windowing function for local neighborhood weights
|
||||
const double window[] = {0.0039, 0.0156, 0.0234, 0.0156, 0.0039,
|
||||
0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
|
||||
0.0234, 0.0938, 0.1406, 0.0938, 0.0234,
|
||||
0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
|
||||
0.0039, 0.0156, 0.0234, 0.0156, 0.0039};
|
||||
// compute temporal derivative
|
||||
pointwise_matrix_sub(smooth_ref, smooth_frm, dt, width, height,
|
||||
smooth_ref_stride, smooth_frm_stride, deriv_stride);
|
||||
|
||||
for (j = 0; j < height; ++j)
|
||||
for (i = 0; i < width; ++i) {
|
||||
optical_flow_per_pixel(dx, dy, dt, window, local_neighborhood_sz,
|
||||
flow, width, height, deriv_stride, i, j);
|
||||
}
|
||||
}
|
||||
|
||||
// Improves an initial approximation for the vector field by iteratively
|
||||
// warping one image towards the other.
|
||||
static void iterative_refinement(unsigned char *ref, double *smooth_frm,
|
||||
double *dx, double *dy, double *dt,
|
||||
flowMV *flow, int width, int height,
|
||||
int ref_stride, int smooth_frm_stride,
|
||||
int deriv_stride, int n_refinements) {
|
||||
int i, j, n;
|
||||
double x, y;
|
||||
unsigned char *estimate = (unsigned char*)safe_malloc(width * height *
|
||||
sizeof(*estimate));
|
||||
double *smooth_estimate = (double*)safe_malloc(width * height *
|
||||
sizeof(*smooth_estimate));
|
||||
flowMV new_flow;
|
||||
new_flow.u = (double*)safe_malloc(width * height * sizeof(*new_flow.u));
|
||||
new_flow.v = (double*)safe_malloc(width * height * sizeof(*new_flow.v));
|
||||
new_flow.confidence = (double*)safe_malloc(width * height *
|
||||
sizeof(*new_flow.confidence));
|
||||
new_flow.stride = width;
|
||||
// warp one image toward the other
|
||||
for (n = 0; n < n_refinements; ++n) {
|
||||
for (j = 0; j < height; ++j)
|
||||
for (i = 0; i < width; ++i) {
|
||||
x = i - flow->u[i + j * flow->stride];
|
||||
y = j - flow->v[i + j * flow->stride];
|
||||
estimate[i + j * width] = interpolate(ref, x, y, width,
|
||||
height, ref_stride);
|
||||
}
|
||||
|
||||
// compute flow between frame and warped estimate
|
||||
blur_img(estimate, width, height, width, smooth_estimate, width, BLUR_SIZE,
|
||||
BLUR_SIGMA, BLUR_AMP);
|
||||
lucas_kanade_base(smooth_frm, smooth_estimate, dx, dy, dt, &new_flow,
|
||||
width, height, smooth_frm_stride, width, deriv_stride);
|
||||
|
||||
// add residual and apply median filter for denoising
|
||||
pointwise_matrix_add(flow->u, new_flow.u, new_flow.u, width, height,
|
||||
flow->stride, new_flow.stride, new_flow.stride);
|
||||
pointwise_matrix_add(flow->v, new_flow.v, new_flow.v, width, height,
|
||||
flow->stride, new_flow.stride, new_flow.stride);
|
||||
median_filter(new_flow.u, flow->u, MEDIAN_FILTER_SIZE, width, height,
|
||||
new_flow.stride, flow->stride);
|
||||
median_filter(new_flow.v, flow->v, MEDIAN_FILTER_SIZE, width, height,
|
||||
new_flow.stride, flow->stride);
|
||||
median_filter(new_flow.confidence, flow->confidence, MEDIAN_FILTER_SIZE,
|
||||
width, height, new_flow.stride, flow->stride);
|
||||
}
|
||||
|
||||
free(smooth_estimate);
|
||||
free(estimate);
|
||||
free(new_flow.u);
|
||||
free(new_flow.v);
|
||||
free(new_flow.confidence);
|
||||
}
|
||||
|
||||
// interface for computing optical flow.
|
||||
void compute_flow(unsigned char *frm, unsigned char *ref,
|
||||
double *u, double *v, double *confidence,
|
||||
int width, int height, int frm_stride,
|
||||
int ref_stride, int n_levels) {
|
||||
double *smooth_ref = (double*)safe_malloc(width * height *
|
||||
sizeof(*smooth_ref));
|
||||
double *smooth_frm = (double*)safe_malloc(width * height *
|
||||
sizeof(*smooth_frm));
|
||||
double *dx_frm = (double*)safe_malloc(width * height *
|
||||
sizeof(*dx_frm));
|
||||
double *dy_frm = (double*)safe_malloc(width * height *
|
||||
sizeof(*dx_frm));
|
||||
double *dt = (double *)safe_malloc(width * height * sizeof(*dt));
|
||||
flowMV flow;
|
||||
flow.u = u;
|
||||
flow.v = v;
|
||||
flow.confidence = confidence;
|
||||
flow.stride = width;
|
||||
if (n_levels == 1) {
|
||||
// Uses lucas kanade and iterative estimation without coarse to fine.
|
||||
// This is more efficient for small motion but becomes less accurate
|
||||
// as motion gets larger.
|
||||
blur_img(ref, width, height, ref_stride, smooth_ref, width, BLUR_SIZE,
|
||||
BLUR_SIGMA, BLUR_AMP);
|
||||
blur_img(frm, width, height, frm_stride, smooth_frm, width, BLUR_SIZE,
|
||||
BLUR_SIGMA, BLUR_AMP);
|
||||
differentiate(smooth_frm, width, height, width, dx_frm, dy_frm, width);
|
||||
lucas_kanade_base(smooth_frm, smooth_ref, dx_frm, dy_frm, dt, &flow,
|
||||
width, height, width, width, width);
|
||||
iterative_refinement(ref, smooth_frm, dx_frm, dy_frm, dt, &flow, width,
|
||||
height, ref_stride, width, width, ITERATIONS);
|
||||
} else {
|
||||
int i, j, k;
|
||||
// w, h, s_r, s_f are intermediate width, height, ref stride, frame stride
|
||||
// as the resolution changes up the pyramid
|
||||
int w, h, s_r, s_f, w_upscale, h_upscale, s_upscale;
|
||||
double x, y;
|
||||
double *smooth_frm_approx = (double*)safe_malloc(width * height *
|
||||
sizeof(*smooth_frm_approx));
|
||||
double *dx_frm_approx = (double*)safe_malloc(width * height *
|
||||
sizeof(*dx_frm_approx));
|
||||
double *dy_frm_approx = (double*)safe_malloc(width * height *
|
||||
sizeof(*dy_frm_approx));
|
||||
double *u_upscale = (double*)safe_malloc(width * height *
|
||||
sizeof(*u_upscale));
|
||||
double *v_upscale = (double*)safe_malloc(width * height *
|
||||
sizeof(*v_upscale));
|
||||
unsigned char *frm_approx = (unsigned char*)safe_malloc(width * height *
|
||||
sizeof(*frm_approx));
|
||||
imPyramid *frm_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
|
||||
imPyramid *ref_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
|
||||
// create image pyramids
|
||||
image_pyramid(frm, frm_pyramid, width, height, frm_stride, n_levels);
|
||||
image_pyramid(ref, ref_pyramid, width, height, ref_stride, n_levels);
|
||||
n_levels = frm_pyramid->n_levels;
|
||||
w = frm_pyramid->widths[n_levels - 1];
|
||||
h = frm_pyramid->heights[n_levels - 1];
|
||||
s_r = ref_pyramid->strides[n_levels - 1];
|
||||
s_f = frm_pyramid->strides[n_levels - 1];
|
||||
|
||||
// for each level in the pyramid starting with the coarsest
|
||||
for (i = n_levels - 1; i >= 0; --i) {
|
||||
assert(frm_pyramid->widths[i] == ref_pyramid->widths[i]);
|
||||
assert(frm_pyramid->heights[i] == ref_pyramid->heights[i]);
|
||||
blur_img(ref_pyramid->levels[i], w, h, s_r, smooth_ref, width, BLUR_SIZE,
|
||||
BLUR_SIGMA, BLUR_AMP);
|
||||
blur_img(frm_pyramid->levels[i], w, h, s_f, smooth_frm, width, BLUR_SIZE,
|
||||
BLUR_SIGMA, BLUR_AMP);
|
||||
differentiate(smooth_frm, w, h, width, dx_frm, dy_frm, width);
|
||||
|
||||
|
||||
// Compute optical flow at this level between the reference frame and
|
||||
// the estimate produced from warping. If this is the first iteration
|
||||
// (meaning no warping has happened yet) then we have no approximation
|
||||
// for the frame and only have to worry about flow between the original
|
||||
// frame and reference. Every subsequent iteration requires computing
|
||||
// and estimate for the frame based on previously computed flow vectors.
|
||||
|
||||
if (i < n_levels - 1) {
|
||||
blur_img(frm_approx, w, h, width, smooth_frm_approx, width, BLUR_SIZE,
|
||||
BLUR_SIGMA, BLUR_AMP);
|
||||
differentiate(smooth_frm_approx, w, h, width, dx_frm_approx,
|
||||
dy_frm_approx, width);
|
||||
lucas_kanade_base(smooth_frm_approx, smooth_ref, dx_frm_approx,
|
||||
dy_frm_approx, dt, &flow, w, h, width, width, width);
|
||||
pointwise_matrix_add(flow.u, u_upscale, u_upscale, w, h, flow.stride,
|
||||
width, width);
|
||||
pointwise_matrix_add(flow.v, v_upscale, v_upscale, w, h, flow.stride,
|
||||
width, width);
|
||||
median_filter(u_upscale, flow.u, MEDIAN_FILTER_SIZE, w, h, width,
|
||||
flow.stride);
|
||||
median_filter(v_upscale, flow.v, MEDIAN_FILTER_SIZE, w, h, width,
|
||||
flow.stride);
|
||||
} else {
|
||||
lucas_kanade_base(smooth_frm, smooth_ref, dx_frm,
|
||||
dy_frm, dt, &flow, w, h, width, width, width);
|
||||
}
|
||||
iterative_refinement(ref_pyramid->levels[i], smooth_frm, dx_frm, dy_frm,
|
||||
dt, &flow, w, h, s_r, width, width, ITERATIONS);
|
||||
|
||||
|
||||
// if we're at the finest level, we're ready to return u and v
|
||||
if (i == 0) {
|
||||
assert(w == width);
|
||||
assert(h == height);
|
||||
destruct_pyramid(frm_pyramid);
|
||||
destruct_pyramid(ref_pyramid);
|
||||
free(frm_pyramid);
|
||||
free(ref_pyramid);
|
||||
free(frm_approx);
|
||||
free(smooth_frm_approx);
|
||||
free(dx_frm_approx);
|
||||
free(dy_frm_approx);
|
||||
free(u_upscale);
|
||||
free(v_upscale);
|
||||
break;
|
||||
}
|
||||
|
||||
w_upscale = ref_pyramid->widths[i - 1];
|
||||
h_upscale = ref_pyramid->heights[i - 1];
|
||||
s_upscale = ref_pyramid->strides[i - 1];
|
||||
|
||||
// warp image according to optical flow estimate
|
||||
for (j = 0; j < h_upscale; ++j)
|
||||
for (k = 0; k < w_upscale; ++k) {
|
||||
u_upscale[k + j * w_upscale] = flow.u[(int)(k >> 1) +
|
||||
(int)(j >> 1) * flow.stride];
|
||||
v_upscale[k + j * w_upscale] = flow.v[(int)(k >> 1) +
|
||||
(int)(j >> 1) * flow.stride];
|
||||
x = k - u_upscale[k + j * w_upscale];
|
||||
y = j - v_upscale[k + j * w_upscale];
|
||||
frm_approx[k + j * width] = interpolate(ref_pyramid->levels[i - 1],
|
||||
x, y, w_upscale, h_upscale,
|
||||
s_upscale);
|
||||
}
|
||||
|
||||
// assign dimensions for next level
|
||||
w = frm_pyramid->widths[i - 1];
|
||||
h = frm_pyramid->heights[i - 1];
|
||||
s_r = ref_pyramid->strides[i - 1];
|
||||
s_f = frm_pyramid->strides[i - 1];
|
||||
}
|
||||
}
|
||||
free(smooth_ref);
|
||||
free(smooth_frm);
|
||||
free(dx_frm);
|
||||
free(dy_frm);
|
||||
free(dt);
|
||||
}
|
28
vp9/encoder/vp9_opticalflow.h
Normal file
28
vp9/encoder/vp9_opticalflow.h
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be
|
||||
* found in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
Top level interface for computing optical flow. This function
|
||||
takes in two images and returns the pixelwise motion field that
|
||||
describes the motion between the two images. n_levels corresponds
|
||||
to the number of levels of an image pyramid to use for coarse to fine
|
||||
refinement. If no coarse to fine refinement is desired, this should be
|
||||
set to 1.
|
||||
|
||||
Note that the warping needs to be done by SUBTRACTING the flow vectors
|
||||
from the locations in the second image passed in (ref) in order to produce
|
||||
a prediction for the first image passed in (frm).
|
||||
|
||||
To compute the motion vectors for a single pixel, see the function
|
||||
optical_flow_per_pixel in vp9_opticalflow.c.
|
||||
*/
|
||||
void compute_flow(unsigned char *frm, unsigned char *ref,
|
||||
double *u, double *v, double *confidence, int width,
|
||||
int height, int frm_stride, int ref_stride, int n_levels);
|
@ -46,16 +46,16 @@ static inline double PYTHAG(double a, double b) {
|
||||
}
|
||||
}
|
||||
|
||||
inline int IMIN(int a, int b) {
|
||||
int IMIN(int a, int b) {
|
||||
return (((a) < (b)) ? (a) : (b));
|
||||
}
|
||||
|
||||
inline int IMAX(int a, int b) {
|
||||
int IMAX(int a, int b) {
|
||||
return (((a) < (b)) ? (b) : (a));
|
||||
}
|
||||
|
||||
static void MultiplyMat(double *m1, double *m2, double *res,
|
||||
const int M1, const int N1, const int N2) {
|
||||
void MultiplyMat(double *m1, double *m2, double *res,
|
||||
const int M1, const int N1, const int N2) {
|
||||
int timesInner = N1;
|
||||
int timesRows = M1;
|
||||
int timesCols = N2;
|
||||
|
@ -35,4 +35,8 @@ int ransacTranslation(double *matched_points, int npoints,
|
||||
int *number_of_inliers, int *best_inlier_indices,
|
||||
double *bestH);
|
||||
|
||||
void MultiplyMat(double *m1, double *m2, double *res,
|
||||
const int M1, const int N1, const int N2);
|
||||
int PseudoInverse(double *inv, double *matx, const int M, const int N);
|
||||
|
||||
#endif // VP9_ENCODER_VP9_RANSAC_H
|
||||
|
@ -81,6 +81,8 @@ VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.c
|
||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.h
|
||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.c
|
||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.h
|
||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_opticalflow.c
|
||||
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_opticalflow.h
|
||||
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.c
|
||||
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.h
|
||||
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
|
||||
|
Loading…
x
Reference in New Issue
Block a user