Optical flow for computing global motion params

Addressing comments, fixing style issues

Change-Id: I91b72ab5cdf80d68476858f442616ab3af41e709
This commit is contained in:
Sarah Parker 2015-12-02 12:24:25 -08:00
parent 9227a372d3
commit a7aab42915
12 changed files with 906 additions and 44 deletions

View File

@ -18,7 +18,7 @@
#include "vp9/common/vp9_mv.h"
#include "vp9/common/vp9_motion_model.h"
INLINE projectPointsType get_projectPointsType(TransformationType type) {
inline projectPointsType get_projectPointsType(TransformationType type) {
switch (type) {
case HOMOGRAPHY:
return projectPointsHomography;
@ -118,8 +118,8 @@ double bicubic(unsigned char *ref, double x, double y, int stride) {
return getCubicValue(arr, x - i);
}
static unsigned char interpolate(unsigned char *ref, double x, double y,
int width, int height, int stride) {
unsigned char interpolate(unsigned char *ref, double x, double y,
int width, int height, int stride) {
if (x < 0 && y < 0) return ref[0];
else if (x < 0 && y > height - 1)
return ref[(height - 1) * stride];
@ -215,6 +215,34 @@ static void WarpImage(TransformationType type, double *H,
}
}
}
// computes warp error
double compute_warp_and_error(TransformationType type,
unsigned char *ref,
unsigned char *frm,
int width, int height, int stride,
double *H) {
int i, j;
double warped;
double mse = 0;
double err = 0;
projectPointsType projectPoints = get_projectPointsType(type);
if (projectPoints == NULL) return -1.0;
for (i = 0; i < height; ++i)
for (j = 0; j < width; ++j) {
double in[2], out[2];
in[0] = j;
in[1] = i;
projectPoints(H, in, out, 1, 2, 2);
warped = interpolate(ref, out[0], out[1], width, height, stride);
err = warped - frm[j + i * stride];
mse += err * err;
}
mse /= (width * height);
return mse;
}
// Computes the ratio of the warp error to the zero motion error
double vp9_warp_erroradv_unq(TransformationType type, double *H,
unsigned char *ref,

View File

@ -49,8 +49,6 @@ static INLINE int get_numparams(TransformationType type) {
}
}
INLINE projectPointsType get_projectPointsType(TransformationType type);
void projectPointsHomography(double *mat, double *points, double *proj,
const int n, const int stride_points,
const int stride_proj);
@ -64,6 +62,8 @@ void projectPointsTranslation(double *mat, double *points, double *proj,
const int n, const int stride_points,
const int stride_proj);
projectPointsType get_projectPointsType(TransformationType type);
void vp9_convert_params_to_rotzoom(Global_Motion_Params *model, double *H);
void vp9_warp_plane(Global_Motion_Params *gm,
@ -92,6 +92,18 @@ double vp9_warp_erroradv_unq(TransformationType type, double *H,
int subsampling_col, int subsampling_row,
int x_scale, int y_scale);
double compute_warp_and_error(TransformationType type,
unsigned char *ref,
unsigned char *frm,
int width, int height, int stride,
double *H);
unsigned char interpolate(unsigned char *ref, double x, double y,
int width, int height, int stride);
int_mv vp9_get_global_sb_center_mv(int col, int row, int bw, int bh,
Global_Motion_Params *model);
int_mv vp9_get_global_sub8x8_center_mv(int col, int row, int block,

View File

@ -24,11 +24,6 @@
#define THRESHOLD_NCC 0.80
typedef struct {
int x, y;
int rx, ry;
} correspondence;
static double compute_variance(unsigned char *im, int stride,
int x, int y, double *mean) {
double sum = 0.0;
@ -121,8 +116,8 @@ static void improve_correspondence(unsigned char *frm, unsigned char *ref,
}
}
}
correspondences[i].rx += best_x;
correspondences[i].ry += best_y;
correspondences[i].rx += (double) best_x;
correspondences[i].ry += (double) best_y;
}
for (i = 0; i < num_correspondences; ++i) {
double template_norm = compute_variance(
@ -164,7 +159,7 @@ int determine_correspondence(unsigned char *frm,
int *ref_corners, int num_ref_corners,
int width, int height,
int frm_stride, int ref_stride,
int *correspondence_pts) {
double *correspondence_pts) {
// Debargha: Improve this to include 2-way match
int i, j;
correspondence *correspondences = (correspondence *)correspondence_pts;
@ -203,18 +198,12 @@ int determine_correspondence(unsigned char *frm,
}
}
if (best_match_ncc > THRESHOLD_NCC) {
correspondences[num_correspondences].x = frm_corners[2 * i];
correspondences[num_correspondences].y = frm_corners[2 * i + 1];
correspondences[num_correspondences].rx = ref_corners[2 * best_match_j];
correspondences[num_correspondences].ry =
correspondences[num_correspondences].x = (double) frm_corners[2 * i];
correspondences[num_correspondences].y = (double) frm_corners[2 * i + 1];
correspondences[num_correspondences].rx = (double)
ref_corners[2 * best_match_j];
correspondences[num_correspondences].ry = (double)
ref_corners[2 * best_match_j + 1];
/*
printf(" %d %d %d %d\n",
correspondences[num_correspondences].x,
correspondences[num_correspondences].y,
correspondences[num_correspondences].rx,
correspondences[num_correspondences].ry);
*/
num_correspondences++;
}
}

View File

@ -16,12 +16,17 @@
#include <stdlib.h>
#include <memory.h>
typedef struct {
double x, y;
double rx, ry;
} correspondence;
int determine_correspondence(unsigned char *frm,
int *frm_corners, int num_frm_corners,
unsigned char *ref,
int *ref_corners, int num_ref_corners,
int width, int height,
int frm_stride, int ref_stride,
int *correspondence_pts);
double *correspondence_pts);
#endif // VP9_ENCODER_VP9_CORNER_MATCH_H

View File

@ -4119,6 +4119,7 @@ static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
#define GLOBAL_MOTION_ADVANTAGE_THRESH_RZ 0.60
#define GLOBAL_MOTION_ADVANTAGE_THRESH_TR 0.75
// #define USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
// #define USE_FEATURE_BASED_GLOBAL_MOTION_COMPUTATION
static void convert_translation_to_params(
double *H, Global_Motion_Params *model) {
@ -4191,7 +4192,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
xd->mi = cm->mi;
xd->mi[0].src_mi = &xd->mi[0];
vp9_zero(cm->counts);
vp9_zero(cpi->coef_counts);
vp9_zero(rd_opt->comp_pred_diff);
@ -4217,6 +4217,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
YV12_BUFFER_CONFIG *ref_buf;
int num, frame;
double global_motion[9 * MAX_GLOBAL_MOTION_MODELS];
for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
ref_buf = get_ref_frame_buffer(cpi, frame);
if (ref_buf) {
@ -4225,11 +4226,15 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_compute_global_motion_multiple_block_based(
cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
BLOCK_16X16, MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
#else
#elif defined(USE_FEATURE_BASED_GLOBAL_MOTION_COMPUTATION)
vp9_compute_global_motion_multiple_feature_based(
cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
#endif // USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
#else
vp9_compute_global_motion_multiple_optical_flow(
cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
#endif
int i;
for (i = 0; i < num; i++) {
convert_model_to_params(
@ -4285,6 +4290,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
}
}
if (get_gmtype(&cm->global_motion[frame][i]) != GLOBAL_ZERO)
printf("Found it %d/%d - %d [%f %f]\n",
cm->current_video_frame, cm->show_frame, frame,
erroradvantage, erroradvantage_trans);
}
}
cm->num_global_motion[frame] = num;
@ -4292,6 +4301,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
}
}
#endif // CONFIG_GLOBAL_MOTION
#if CONFIG_VP9_HIGHBITDEPTH

View File

@ -25,6 +25,7 @@
#include "vp9/common/vp9_motion_model.h"
#include "vp9/encoder/vp9_corner_detect.h"
#include "vp9/encoder/vp9_corner_match.h"
#include "vp9/encoder/vp9_opticalflow.h"
#include "vp9/encoder/vp9_ransac.h"
#include "vp9/encoder/vp9_global_motion.h"
#include "vp9/encoder/vp9_motion_field.h"
@ -36,6 +37,8 @@
#define MIN_INLIER_PROB 0.1
// #define PARAM_SEARCH
#define MAX_CORNERS 4096
INLINE ransacType get_ransacType(TransformationType type) {
@ -92,13 +95,60 @@ static double compute_error_score(TransformationType type,
return sqrt(sqerr / n);
}
void refine_param(TransformationType type, unsigned char *frm,
unsigned char *ref, double *H,
int param_index, int width, int height,
int stride, int n_refinements) {
int i;
double step_mse;
double best_mse;
double curr_mse;
double curr_param = H[param_index];
double step = 0.5;
double best_param = curr_param;
curr_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
best_mse = curr_mse;
for (i = 0; i < n_refinements; i++) {
// look to the left
H[param_index] = curr_param - step;
step_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
if (step_mse < best_mse) {
step /= 2;
best_mse = step_mse;
best_param = H[param_index];
curr_param = best_param;
curr_mse = step_mse;
continue;
}
// look to the right
H[param_index] = curr_param + step;
step_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
if (step_mse < best_mse) {
step /= 2;
best_mse = step_mse;
best_param = H[param_index];
curr_param = best_param;
curr_mse = step_mse;
continue;
}
// no improvement found-> means we're either already at a minimum or
// step is too wide
step /= 4;
}
H[param_index] = best_param;
}
static int compute_global_motion_single(TransformationType type,
int *correspondences,
double *correspondences,
int num_correspondences,
double *H,
int *inlier_map) {
double *mp, *matched_points;
int *cp = correspondences;
double *cp = correspondences;
int i, result;
int num_inliers = 0;
ransacType ransac = get_ransacType(type);
@ -138,12 +188,13 @@ int vp9_compute_global_motion_single_feature_based(
double *H) {
int num_frm_corners, num_ref_corners;
int num_correspondences;
int *correspondences;
double *correspondences;
int num_inliers;
int *inlier_map = NULL;
int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
(void) cpi;
#ifdef USE_FAST_CORNER
num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width,
frm->y_height, frm->y_stride,
@ -162,7 +213,7 @@ int vp9_compute_global_motion_single_feature_based(
printf("Reference corners = %d\n", num_ref_corners);
#endif
correspondences = (int *) malloc(num_frm_corners * 4 *
correspondences = (double *) malloc(num_frm_corners * 4 *
sizeof(*correspondences));
num_correspondences = determine_correspondence(frm->y_buffer,
@ -201,14 +252,14 @@ int vp9_compute_global_motion_single_feature_based(
}
static int compute_global_motion_multiple(TransformationType type,
int *correspondences,
double *correspondences,
int num_correspondences,
double *H,
int max_models,
double inlier_prob,
int *num_models,
int *processed_mask) {
int *cp = correspondences;
double *cp = correspondences;
double *mp, *matched_points;
int *best_inlier_mask;
int i, result;
@ -268,6 +319,85 @@ static int compute_global_motion_multiple(TransformationType type,
return num_inliers_sum;
}
int vp9_compute_global_motion_multiple_optical_flow(struct VP9_COMP *cpi,
TransformationType type,
YV12_BUFFER_CONFIG *frm,
YV12_BUFFER_CONFIG *ref,
int max_models,
double inlier_prob,
double *H) {
int num_correspondences = 0;
double *correspondence_pts = (double *)malloc(frm->y_width * frm->y_height *
sizeof(correspondence));
correspondence *correspondences = (correspondence *)correspondence_pts;
int num_inliers;
int i, j;
int num_models = 0;
int *inlier_map = NULL;
double *u = (double *)malloc(frm->y_width * frm->y_height * sizeof(u));
double *v = (double *)malloc(frm->y_width * frm->y_height * sizeof(v));
double *confidence = (double *)malloc(frm->y_width * frm->y_height *
sizeof(confidence));
(void) cpi;
compute_flow(frm->y_buffer, ref->y_buffer, u, v, confidence,
frm->y_width, frm->y_height, frm->y_stride,
ref->y_stride, 1);
// avoid the boundaries because of artifacts
for (i = 10; i < frm->y_width - 10; ++i)
for (j = 10; j < frm->y_height - 10; ++j) {
// Note that this threshold could still benefit from tuning. Confidence
// is based on the inverse of the harris corner score.
if (confidence[i + j * frm->y_width] < 0.01) {
correspondences[num_correspondences].x = i;
correspondences[num_correspondences].y = j;
correspondences[num_correspondences].rx = i - u[i + j * frm->y_width];
correspondences[num_correspondences].ry = j - v[i + j * frm->y_width];
num_correspondences++;
}
}
printf("Number of correspondences = %d\n", num_correspondences);
#ifdef VERBOSE
printf("Number of correspondences = %d\n", num_correspondences);
#endif
inlier_map = (int *)malloc(num_correspondences * sizeof(*inlier_map));
num_inliers = compute_global_motion_multiple(type, correspondence_pts,
num_correspondences, H,
max_models, inlier_prob,
&num_models, inlier_map);
#ifdef PARAM_SEARCH
for (j = 0; j < num_models; ++j) {
for (i = 0; i < get_numparams(type); ++i)
refine_param(type, frm->y_buffer, ref->y_buffer, H,
i + j * get_numparams(type), frm->y_width, frm->y_height,
frm->y_stride, 2);
}
#endif
#ifdef VERBOSE
printf("Models = %d, Inliers = %d\n", num_models, num_inliers);
if (num_models)
printf("Error Score (inliers) = %g\n",
compute_error_score(type, correspondences, 4, correspondences + 2, 4,
num_correspondences, H, inlier_map));
printf("Warp error score = %g\n",
vp9_warp_error_unq(ROTZOOM, H, ref->y_buffer, ref->y_crop_width,
ref->y_crop_height, ref->y_stride,
frm->y_buffer, 0, 0,
frm->y_crop_width, frm->y_crop_height,
frm->y_stride, 0, 0, 16, 16));
#endif
(void) num_inliers;
free(correspondences);
free(inlier_map);
free(u);
free(v);
free(confidence);
return num_models;
}
// Returns number of models actually returned
int vp9_compute_global_motion_multiple_feature_based(
struct VP9_COMP *cpi,
@ -279,12 +409,15 @@ int vp9_compute_global_motion_multiple_feature_based(
double *H) {
int num_frm_corners, num_ref_corners;
int num_correspondences;
int *correspondences;
double *correspondences;
int num_inliers;
int i, j;
int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
int num_models = 0;
int *inlier_map = NULL;
(void) cpi;
(void) i;
(void) j;
#ifdef USE_FAST_CORNER
num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width,
@ -304,7 +437,7 @@ int vp9_compute_global_motion_multiple_feature_based(
printf("Reference corners = %d\n", num_ref_corners);
#endif
correspondences = (int *) malloc(num_frm_corners * 4 *
correspondences = (double *) malloc(num_frm_corners * 4 *
sizeof(*correspondences));
num_correspondences = determine_correspondence(frm->y_buffer,
@ -316,6 +449,7 @@ int vp9_compute_global_motion_multiple_feature_based(
frm->y_width, frm->y_height,
frm->y_stride, ref->y_stride,
correspondences);
printf("Number of correspondences = %d\n", num_correspondences);
#ifdef VERBOSE
printf("Number of correspondences = %d\n", num_correspondences);
#endif
@ -324,6 +458,16 @@ int vp9_compute_global_motion_multiple_feature_based(
num_correspondences, H,
max_models, inlier_prob,
&num_models, inlier_map);
#ifdef PARAM_SEARCH
for (j = 0; j < num_models; ++j) {
for (i = 0; i < get_numparams(type); ++i)
refine_param(type, frm->y_buffer, ref->y_buffer, H,
i + j * get_numparams(type), frm->y_width, frm->y_height,
frm->y_stride, 3);
}
#endif
#ifdef VERBOSE
printf("Models = %d, Inliers = %d\n", num_models, num_inliers);
if (num_models)
@ -352,7 +496,7 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
double *H) {
VP9_COMMON *const cm = &cpi->common;
int num_correspondences = 0;
int *correspondences;
double *correspondences;
int num_inliers;
int *inlier_map = NULL;
int bwidth = num_4x4_blocks_wide_lookup[bsize] << 2;
@ -363,7 +507,7 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);
correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
correspondences = (double *)malloc(4 * cm->mb_rows * cm->mb_cols *
sizeof(*correspondences));
for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
@ -407,7 +551,7 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
double *H) {
VP9_COMMON *const cm = &cpi->common;
int num_correspondences = 0;
int *correspondences;
double *correspondences;
int num_inliers;
int num_models = 0;
int *inlier_map = NULL;
@ -419,7 +563,7 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
double confidence[4096];
vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);
correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
correspondences = (double *)malloc(4 * cm->mb_rows * cm->mb_cols *
sizeof(*correspondences));
for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {

View File

@ -28,6 +28,14 @@ static const int CONFIDENCE_THRESHOLD = 1.0;
INLINE ransacType get_ransacType(TransformationType type);
// Searches around each parameter and seeks to minimize MSE between
// the warped frame produced from the set of parameters and the frame being
// approximated.
void refine_param(TransformationType type, unsigned char *frm,
unsigned char *ref, double *H,
int param_index, int width, int height,
int stride, int n_refinements);
// Returns number of models actually returned: 1 - if success, 0 - if failure
int vp9_compute_global_motion_single_feature_based(struct VP9_COMP *cpi,
TransformationType type,
@ -58,6 +66,15 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
BLOCK_SIZE bsize,
double *H);
int vp9_compute_global_motion_multiple_optical_flow(struct VP9_COMP *cpi,
TransformationType type,
YV12_BUFFER_CONFIG *frm,
YV12_BUFFER_CONFIG *ref,
int max_models,
double inlier_prob,
double *H);
// Returns number of models actually returned: 1+ - #models, 0 - if failure
// max_models is the maximum number of models returned
// inlier_prob is the probability of being inlier over all the models

View File

@ -0,0 +1,623 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be
* found in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <math.h>
#include <stdio.h>
#include "vp9/common/vp9_motion_model.h"
#include "vp9/encoder/vp9_opticalflow.h"
#include "vp9/encoder/vp9_ransac.h"
#include "vp9/encoder/vp9_resize.h"
// ITERATIONS is the number of iterative refinements to make on an initial
// flow estimate. 3 seems to work well but feel free to play with this.
#define ITERATIONS 3
#define MAX_MEDIAN_LENGTH 121
#define MAX_LEVELS 8
#define MIN_LEVEL_SIZE 30
#define MAX_ERROR 0.00001
#define BLUR_SIZE 11
#define BLUR_SIGMA 1.5
#define BLUR_AMP 1
#define MEDIAN_FILTER_SIZE 5
#define RELATIVE_ERROR(a, b) (fabs(((a) - (b))))
#define WRAP_PIXEL(p, size) ((p) < 0 ? abs((p)) - 1 : \
((p) >= (size) ? (2 * (size) - 1 - (p)) : (p)))
// Struct for an image pyramid
typedef struct {
int n_levels;
int widths[MAX_LEVELS];
int heights[MAX_LEVELS];
int strides[MAX_LEVELS];
unsigned char **levels;
} imPyramid;
typedef struct {
int stride;
double *confidence;
double *u;
double *v;
} flowMV;
static void * safe_malloc(size_t size) {
void *v = malloc(size);
if (!v) {
fprintf(stderr, "Not enough memory\n");
exit(EXIT_FAILURE);
}
return v;
}
// Produces an image pyramid where each level is resized by a
// specified resize factor
static void image_pyramid(unsigned char *img, imPyramid *pyr, int width,
int height, int stride, int n_levels) {
int i;
int max_levels = 1;
int scaled = width < height ? (width/MIN_LEVEL_SIZE) :
(height/MIN_LEVEL_SIZE);
pyr->widths[0] = width;
pyr->heights[0] = height;
pyr->strides[0] = stride;
if (n_levels == 1) {
pyr->n_levels = 1;
pyr->levels = (unsigned char**)safe_malloc(sizeof(*pyr->levels));
pyr->levels[0] = img;
return;
}
// compute number of possible levels of the pyramid. The smallest dim of the
// smallest level should be no less than 30 px.
while (scaled > 1) {
max_levels++;
scaled >>= 1;
}
if (n_levels > max_levels)
n_levels = max_levels;
pyr->n_levels = n_levels;
pyr->levels = (unsigned char**)safe_malloc(n_levels * sizeof(*pyr->levels));
pyr->levels[0] = img;
// compute each level
for (i = 1; i < n_levels; ++i) {
pyr->widths[i] = pyr->widths[i-1] >> 1;
pyr->heights[i] = pyr->heights[i-1] >> 1;
pyr->strides[i] = pyr->widths[i];
pyr->levels[i] = (unsigned char*)safe_malloc(pyr->widths[i] *
pyr->heights[i] *
sizeof(*pyr->levels[i]));
vp9_resize_plane(pyr->levels[i-1], pyr->heights[i-1], pyr->widths[i-1],
pyr->strides[i-1], pyr->levels[i], pyr->heights[i],
pyr->widths[i], pyr->strides[i]);
}
}
static void destruct_pyramid(imPyramid *pyr) {
int i;
// start at level 1 because level 0 is a reference to the original image
for (i = 1; i < pyr->n_levels; ++i)
free(pyr->levels[i]);
free(pyr->levels);
}
// Convolution with reflected content padding to minimize edge artifacts.
static void convolve_char(double *filter, unsigned char *image,
double *convolved, int filt_width, int filt_height,
int image_width, int image_height, int image_stride,
int convolved_stride) {
int i, j, x, y, imx, imy;
double pixel_val;
double filter_val;
double image_val;
int x_pad_size = filt_width >> 1;
int y_pad_size = filt_height >> 1;
for (j = 0; j < image_height; ++j)
for (i = 0; i < image_width; ++i) {
pixel_val = 0;
for (y = (-y_pad_size); y<= y_pad_size; y++)
for (x = (-x_pad_size); x <= x_pad_size; x++) {
filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
imx = WRAP_PIXEL(i + x, image_width);
imy = WRAP_PIXEL(j + y, image_height);
image_val = image[imx + imy * image_stride];
pixel_val += (filter_val * image_val);
}
convolved[i + j * convolved_stride] = pixel_val;
}
}
// Convolution with reflected content padding to minimize edge artifacts.
static void convolve_double(double *filter, double *image, double *convolved,
int filt_width, int filt_height, int image_width,
int image_height, int image_stride,
int convolved_stride) {
int i, j, x, y, imx, imy;
double pixel_val;
double filter_val;
double image_val;
int x_pad_size = filt_width >> 1;
int y_pad_size = filt_height >> 1;
for (j = 0; j < image_height; ++j)
for (i = 0; i < image_width; ++i) {
pixel_val = 0;
for (y = (-y_pad_size); y<= y_pad_size; y++)
for (x = (-x_pad_size); x <= x_pad_size; x++) {
filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
imx = WRAP_PIXEL(i + x, image_width);
imy = WRAP_PIXEL(j + y, image_height);
image_val = image[imx + imy * image_stride];
pixel_val += (filter_val * image_val);
}
convolved[i + j * convolved_stride] = pixel_val;
}
}
// computes x and y spatial derivatives of an image.
static void differentiate(double *img, int width, int height,
int stride, double *dx_img, double *dy_img,
int deriv_stride) {
const double spatial_deriv_kernel[] = {-0.0833, 0.6667, 0.0,
-0.6667, 0.0833};
convolve_double(spatial_deriv_kernel, img, dx_img, 5, 1, width, height,
stride, deriv_stride);
convolve_double(spatial_deriv_kernel, img, dy_img, 1, 5, width, height,
stride, deriv_stride);
}
// creates a 2D gaussian kernel
static void gaussian_kernel(double *mat, int size, double sig, double amp) {
int x, y;
double filter_val;
int half;
double denominator = 1 / (2.0 * sig * sig);
double sum = 0.0f;
size |= 1;
half = size >> 1;
for (y = -half; y<= half; y++)
for (x = -half; x<= half; x++) {
filter_val = amp * exp((double)(-1.0 * ((x * x + y * y) * denominator)));
mat[(x + half) + ((y + half) * size)] = filter_val;
sum += filter_val;
}
// normalize the filter
if (sum > MAX_ERROR) {
sum = 1 / sum;
for (x = 0; x < size * size; x++) {
mat[x] = mat[x] * sum;
}
}
}
// blurs image with a gaussian kernel
static void blur_img(unsigned char *img, int width, int height,
int stride, double *smooth_img, int smooth_stride,
int smoothing_size, double smoothing_sig,
double smoothing_amp) {
double *smoothing_kernel = (double *)safe_malloc(smoothing_size *
smoothing_size *
sizeof(*smoothing_kernel));
gaussian_kernel(smoothing_kernel, smoothing_size, smoothing_sig,
smoothing_amp);
convolve_char(smoothing_kernel, img, smooth_img, smoothing_size,
smoothing_size, width, height, stride, smooth_stride);
free(smoothing_kernel);
}
// Implementation of Hoare's select for linear time median selection.
// Takes in a pointer to the beginning of a list of values, the length
// of the list, an integer representing the index of the number to be
// selected, and an unsigned integer used to seed a random number generator.
static double selection(double *vals, int length, int k, unsigned int seed) {
int pivot_ind, x;
double pivot;
double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
int l = 0, e = 0, g = 0;
pivot_ind = (int) ((length / ((double)RAND_MAX)) * rand_r(&seed));
pivot = vals[pivot_ind];
for (x = 0; x < length; ++x) {
if (RELATIVE_ERROR(vals[x], pivot) <= MAX_ERROR) {
e++;
} else if (vals[x] < pivot) {
L[l] = vals[x];
l++;
} else if (vals[x] > pivot) {
G[g] = vals[x];
g++;
}
}
if (k <= l)
return selection(L, l, k, seed);
else if (k <= l + e)
return pivot;
else
return selection(G, g, k - l - e, seed);
}
// Performs median filtering for denoising. This implementation uses hoare's
// select to find the median in a block. block_x, block_y are
// the x and y coordinates of the center of the block in the source.
static void median_filter(double *source, double *filtered, int block_size,
int width, int height, int source_stride,
int filtered_stride) {
int i, j, block_x, block_y, imx, imy;
double pivot, val;
int length = block_size * block_size;
double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
int k = length >> 1 | 1;
int l, e, g;
int pad_size = block_size >> 1;
unsigned int seed = (unsigned int)*source;
// find the median within each block. Reflected content is used for padding.
for (block_y = 0; block_y < height; ++block_y)
for (block_x = 0; block_x < width; ++block_x) {
l = 0, e = 0, g = 0;
memset(L, 0, length * sizeof(*L));
memset(G, 0, length * sizeof(*G));
pivot = source[block_x + block_y * source_stride];
for (j = -pad_size; j <= pad_size; ++j)
for (i = -pad_size; i <= pad_size; ++i) {
imx = WRAP_PIXEL(i + block_x, width);
imy = WRAP_PIXEL(j + block_y, height);
val = source[imx + imy * source_stride];
// pulling out the first iteration of selection so we don't have
// to iterate through the block to flatten it and then
// iterate through those same values to put them into
// the L E G bins in selection. Ugly but more efficent.
if (RELATIVE_ERROR(val, pivot) <= MAX_ERROR) {
e++;
} else if (val < pivot) {
L[l] = val;
l++;
} else if (val > pivot) {
G[g] = val;
g++;
}
}
if (k <= l)
filtered[block_x + block_y * filtered_stride] =
selection(L, l, k, seed);
else if (k <= l + e)
filtered[block_x + block_y * filtered_stride] = pivot;
else
filtered[block_x + block_y * filtered_stride] = selection(G, g, k -
l - e, seed);
}
}
static inline void pointwise_matrix_sub(double *mat1, double *mat2,
double *diff, int width, int height,
int stride1, int stride2,
int diffstride) {
int i, j;
for (j = 0; j < height; ++j)
for (i = 0; i < width; ++i) {
diff[i + j * diffstride] = mat1[i + j * stride1] - mat2[i + j * stride2];
}
}
static inline void pointwise_matrix_add(double *mat1, double *mat2,
double *sum, int width, int height,
int stride1, int stride2,
int sumstride) {
int i, j;
for (j = 0; j < height; ++j)
for (i = 0; i < width; ++i) {
sum[i + j * sumstride] = mat1[i + j * stride1] + mat2[i + j * stride2];
}
}
// Solves lucas kanade equation at any give pixel in the image using a local
// neighborhood for support. loc_x and loc_y are the x and y components
// of the center of the local neighborhood. Assumes dx, dy and dt have same
// stride. window is a wind_size x wind_size set of weights for the
// window_size x window_size neighborhood surrounding loc_x and loc_y.
static void optical_flow_per_pixel(double *dx, double *dy, double *dt,
double *window, int wind_size,
flowMV *flow, int width, int height,
int stride, int loc_x, int loc_y) {
int i, j, iw, jw, im_ind;
double g = 0;
// M and b are matrices used to solve the equation a = M^-1 * b where a
// are the desired optical flow parameters
double M[4] = {0, 0, 0, 0};
double b[2] = {0, 0};
double det = 0;
double trace2 = 0;
double corner_score = 0;
int step = wind_size >> 1;
double gdx = 0, gdy = 0;
for (j = loc_y - step; j <= loc_y + step; ++j)
for (i = loc_x - step; i <= loc_x + step; ++i) {
// if pixel is out of bounds, use reflected image content
iw = WRAP_PIXEL(i, width);
jw = WRAP_PIXEL(j, height);
im_ind = iw + jw * stride;
g = window[(i - loc_x + step) + (j - loc_y + step) * wind_size];
gdx = g * dx[im_ind];
gdy = g * dy[im_ind];
M[0] += gdx * dx[im_ind];
M[1] += gdx * dy[im_ind];
M[3] += gdy * dy[im_ind];
b[0] += -gdx * dt[im_ind];
b[1] += -gdy * dt[im_ind];
}
M[2] = M[1];
det = (M[0] * M[3]) - (M[1] * M[1]);
trace2 = (M[0] + M[3]) * (M[0] + M[3]);
if (RELATIVE_ERROR(det, 0) > MAX_ERROR) {
const double det_inv = 1 / det;
const double mult_b0 = det_inv * b[0];
const double mult_b1 = det_inv * b[1];
flow->u[loc_x + loc_y * flow->stride] = M[3] * mult_b0 - M[1] * mult_b1;
flow->v[loc_x + loc_y * flow->stride] = -M[2] * mult_b0 + M[0] * mult_b1;
} else {
if (M[0] == 0 && M[3] == 0) {
flow->u[loc_x + loc_y * flow->stride] = 0;
flow->v[loc_x + loc_y * flow->stride] = 0;
} else {
const double trace2_inv = 1 / trace2;
const double mult_b0 = trace2_inv * b[0];
const double mult_b1 = trace2_inv * b[1];
flow->u[loc_x + loc_y * flow->stride] = M[0] * mult_b0 + M[1] * mult_b1;
flow->v[loc_x + loc_y * flow->stride] = M[2] * mult_b0 + M[3] * mult_b1;
}
}
// compute inverse harris corner score as confidence metric
corner_score = 1 / (det - (0.01 * trace2));
flow->confidence[loc_x + loc_y * flow->stride] = corner_score > 2 ?
2 : corner_score;
}
// computes lucas kanade optical flow. Note that this assumes images that
// already denoised and differentiated.
static void lucas_kanade_base(double *smooth_frm, double *smooth_ref,
double *dx, double *dy, double *dt, flowMV *flow,
int width, int height, int smooth_frm_stride,
int smooth_ref_stride, int deriv_stride) {
int i, j;
const int local_neighborhood_sz = 5;
// windowing function for local neighborhood weights
const double window[] = {0.0039, 0.0156, 0.0234, 0.0156, 0.0039,
0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
0.0234, 0.0938, 0.1406, 0.0938, 0.0234,
0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
0.0039, 0.0156, 0.0234, 0.0156, 0.0039};
// compute temporal derivative
pointwise_matrix_sub(smooth_ref, smooth_frm, dt, width, height,
smooth_ref_stride, smooth_frm_stride, deriv_stride);
for (j = 0; j < height; ++j)
for (i = 0; i < width; ++i) {
optical_flow_per_pixel(dx, dy, dt, window, local_neighborhood_sz,
flow, width, height, deriv_stride, i, j);
}
}
// Improves an initial approximation for the vector field by iteratively
// warping one image towards the other.
static void iterative_refinement(unsigned char *ref, double *smooth_frm,
double *dx, double *dy, double *dt,
flowMV *flow, int width, int height,
int ref_stride, int smooth_frm_stride,
int deriv_stride, int n_refinements) {
int i, j, n;
double x, y;
unsigned char *estimate = (unsigned char*)safe_malloc(width * height *
sizeof(*estimate));
double *smooth_estimate = (double*)safe_malloc(width * height *
sizeof(*smooth_estimate));
flowMV new_flow;
new_flow.u = (double*)safe_malloc(width * height * sizeof(*new_flow.u));
new_flow.v = (double*)safe_malloc(width * height * sizeof(*new_flow.v));
new_flow.confidence = (double*)safe_malloc(width * height *
sizeof(*new_flow.confidence));
new_flow.stride = width;
// warp one image toward the other
for (n = 0; n < n_refinements; ++n) {
for (j = 0; j < height; ++j)
for (i = 0; i < width; ++i) {
x = i - flow->u[i + j * flow->stride];
y = j - flow->v[i + j * flow->stride];
estimate[i + j * width] = interpolate(ref, x, y, width,
height, ref_stride);
}
// compute flow between frame and warped estimate
blur_img(estimate, width, height, width, smooth_estimate, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
lucas_kanade_base(smooth_frm, smooth_estimate, dx, dy, dt, &new_flow,
width, height, smooth_frm_stride, width, deriv_stride);
// add residual and apply median filter for denoising
pointwise_matrix_add(flow->u, new_flow.u, new_flow.u, width, height,
flow->stride, new_flow.stride, new_flow.stride);
pointwise_matrix_add(flow->v, new_flow.v, new_flow.v, width, height,
flow->stride, new_flow.stride, new_flow.stride);
median_filter(new_flow.u, flow->u, MEDIAN_FILTER_SIZE, width, height,
new_flow.stride, flow->stride);
median_filter(new_flow.v, flow->v, MEDIAN_FILTER_SIZE, width, height,
new_flow.stride, flow->stride);
median_filter(new_flow.confidence, flow->confidence, MEDIAN_FILTER_SIZE,
width, height, new_flow.stride, flow->stride);
}
free(smooth_estimate);
free(estimate);
free(new_flow.u);
free(new_flow.v);
free(new_flow.confidence);
}
// interface for computing optical flow.
void compute_flow(unsigned char *frm, unsigned char *ref,
double *u, double *v, double *confidence,
int width, int height, int frm_stride,
int ref_stride, int n_levels) {
double *smooth_ref = (double*)safe_malloc(width * height *
sizeof(*smooth_ref));
double *smooth_frm = (double*)safe_malloc(width * height *
sizeof(*smooth_frm));
double *dx_frm = (double*)safe_malloc(width * height *
sizeof(*dx_frm));
double *dy_frm = (double*)safe_malloc(width * height *
sizeof(*dx_frm));
double *dt = (double *)safe_malloc(width * height * sizeof(*dt));
flowMV flow;
flow.u = u;
flow.v = v;
flow.confidence = confidence;
flow.stride = width;
if (n_levels == 1) {
// Uses lucas kanade and iterative estimation without coarse to fine.
// This is more efficient for small motion but becomes less accurate
// as motion gets larger.
blur_img(ref, width, height, ref_stride, smooth_ref, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
blur_img(frm, width, height, frm_stride, smooth_frm, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
differentiate(smooth_frm, width, height, width, dx_frm, dy_frm, width);
lucas_kanade_base(smooth_frm, smooth_ref, dx_frm, dy_frm, dt, &flow,
width, height, width, width, width);
iterative_refinement(ref, smooth_frm, dx_frm, dy_frm, dt, &flow, width,
height, ref_stride, width, width, ITERATIONS);
} else {
int i, j, k;
// w, h, s_r, s_f are intermediate width, height, ref stride, frame stride
// as the resolution changes up the pyramid
int w, h, s_r, s_f, w_upscale, h_upscale, s_upscale;
double x, y;
double *smooth_frm_approx = (double*)safe_malloc(width * height *
sizeof(*smooth_frm_approx));
double *dx_frm_approx = (double*)safe_malloc(width * height *
sizeof(*dx_frm_approx));
double *dy_frm_approx = (double*)safe_malloc(width * height *
sizeof(*dy_frm_approx));
double *u_upscale = (double*)safe_malloc(width * height *
sizeof(*u_upscale));
double *v_upscale = (double*)safe_malloc(width * height *
sizeof(*v_upscale));
unsigned char *frm_approx = (unsigned char*)safe_malloc(width * height *
sizeof(*frm_approx));
imPyramid *frm_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
imPyramid *ref_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
// create image pyramids
image_pyramid(frm, frm_pyramid, width, height, frm_stride, n_levels);
image_pyramid(ref, ref_pyramid, width, height, ref_stride, n_levels);
n_levels = frm_pyramid->n_levels;
w = frm_pyramid->widths[n_levels - 1];
h = frm_pyramid->heights[n_levels - 1];
s_r = ref_pyramid->strides[n_levels - 1];
s_f = frm_pyramid->strides[n_levels - 1];
// for each level in the pyramid starting with the coarsest
for (i = n_levels - 1; i >= 0; --i) {
assert(frm_pyramid->widths[i] == ref_pyramid->widths[i]);
assert(frm_pyramid->heights[i] == ref_pyramid->heights[i]);
blur_img(ref_pyramid->levels[i], w, h, s_r, smooth_ref, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
blur_img(frm_pyramid->levels[i], w, h, s_f, smooth_frm, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
differentiate(smooth_frm, w, h, width, dx_frm, dy_frm, width);
// Compute optical flow at this level between the reference frame and
// the estimate produced from warping. If this is the first iteration
// (meaning no warping has happened yet) then we have no approximation
// for the frame and only have to worry about flow between the original
// frame and reference. Every subsequent iteration requires computing
// and estimate for the frame based on previously computed flow vectors.
if (i < n_levels - 1) {
blur_img(frm_approx, w, h, width, smooth_frm_approx, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
differentiate(smooth_frm_approx, w, h, width, dx_frm_approx,
dy_frm_approx, width);
lucas_kanade_base(smooth_frm_approx, smooth_ref, dx_frm_approx,
dy_frm_approx, dt, &flow, w, h, width, width, width);
pointwise_matrix_add(flow.u, u_upscale, u_upscale, w, h, flow.stride,
width, width);
pointwise_matrix_add(flow.v, v_upscale, v_upscale, w, h, flow.stride,
width, width);
median_filter(u_upscale, flow.u, MEDIAN_FILTER_SIZE, w, h, width,
flow.stride);
median_filter(v_upscale, flow.v, MEDIAN_FILTER_SIZE, w, h, width,
flow.stride);
} else {
lucas_kanade_base(smooth_frm, smooth_ref, dx_frm,
dy_frm, dt, &flow, w, h, width, width, width);
}
iterative_refinement(ref_pyramid->levels[i], smooth_frm, dx_frm, dy_frm,
dt, &flow, w, h, s_r, width, width, ITERATIONS);
// if we're at the finest level, we're ready to return u and v
if (i == 0) {
assert(w == width);
assert(h == height);
destruct_pyramid(frm_pyramid);
destruct_pyramid(ref_pyramid);
free(frm_pyramid);
free(ref_pyramid);
free(frm_approx);
free(smooth_frm_approx);
free(dx_frm_approx);
free(dy_frm_approx);
free(u_upscale);
free(v_upscale);
break;
}
w_upscale = ref_pyramid->widths[i - 1];
h_upscale = ref_pyramid->heights[i - 1];
s_upscale = ref_pyramid->strides[i - 1];
// warp image according to optical flow estimate
for (j = 0; j < h_upscale; ++j)
for (k = 0; k < w_upscale; ++k) {
u_upscale[k + j * w_upscale] = flow.u[(int)(k >> 1) +
(int)(j >> 1) * flow.stride];
v_upscale[k + j * w_upscale] = flow.v[(int)(k >> 1) +
(int)(j >> 1) * flow.stride];
x = k - u_upscale[k + j * w_upscale];
y = j - v_upscale[k + j * w_upscale];
frm_approx[k + j * width] = interpolate(ref_pyramid->levels[i - 1],
x, y, w_upscale, h_upscale,
s_upscale);
}
// assign dimensions for next level
w = frm_pyramid->widths[i - 1];
h = frm_pyramid->heights[i - 1];
s_r = ref_pyramid->strides[i - 1];
s_f = frm_pyramid->strides[i - 1];
}
}
free(smooth_ref);
free(smooth_frm);
free(dx_frm);
free(dy_frm);
free(dt);
}

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be
* found in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
Top level interface for computing optical flow. This function
takes in two images and returns the pixelwise motion field that
describes the motion between the two images. n_levels corresponds
to the number of levels of an image pyramid to use for coarse to fine
refinement. If no coarse to fine refinement is desired, this should be
set to 1.
Note that the warping needs to be done by SUBTRACTING the flow vectors
from the locations in the second image passed in (ref) in order to produce
a prediction for the first image passed in (frm).
To compute the motion vectors for a single pixel, see the function
optical_flow_per_pixel in vp9_opticalflow.c.
*/
void compute_flow(unsigned char *frm, unsigned char *ref,
double *u, double *v, double *confidence, int width,
int height, int frm_stride, int ref_stride, int n_levels);

View File

@ -46,16 +46,16 @@ static inline double PYTHAG(double a, double b) {
}
}
inline int IMIN(int a, int b) {
int IMIN(int a, int b) {
return (((a) < (b)) ? (a) : (b));
}
inline int IMAX(int a, int b) {
int IMAX(int a, int b) {
return (((a) < (b)) ? (b) : (a));
}
static void MultiplyMat(double *m1, double *m2, double *res,
const int M1, const int N1, const int N2) {
void MultiplyMat(double *m1, double *m2, double *res,
const int M1, const int N1, const int N2) {
int timesInner = N1;
int timesRows = M1;
int timesCols = N2;

View File

@ -35,4 +35,8 @@ int ransacTranslation(double *matched_points, int npoints,
int *number_of_inliers, int *best_inlier_indices,
double *bestH);
void MultiplyMat(double *m1, double *m2, double *res,
const int M1, const int N1, const int N2);
int PseudoInverse(double *inv, double *matx, const int M, const int N);
#endif // VP9_ENCODER_VP9_RANSAC_H

View File

@ -81,6 +81,8 @@ VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.c
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.h
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.c
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.h
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_opticalflow.c
VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_opticalflow.h
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.c
VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c