vpx/vp9/encoder/vp9_opticalflow.c
Sarah Parker 7fa7f4c14a Fix comipler warnings in opticalflow.c
Change-Id: I4561c2676d8a7793ade47e1995e026ba9d521fdd
2016-04-20 17:27:13 -07:00

625 lines
25 KiB
C

/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be
* found in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <math.h>
#include <stdio.h>
#include "vp9/common/vp9_motion_model.h"
#include "vp9/encoder/vp9_opticalflow.h"
#include "vp9/encoder/vp9_ransac.h"
#include "vp9/encoder/vp9_resize.h"
// ITERATIONS is the number of iterative refinements to make on an initial
// flow estimate. 3 seems to work well but feel free to play with this.
#define ITERATIONS 3
#define MAX_MEDIAN_LENGTH 121
#define MAX_LEVELS 8
#define MIN_LEVEL_SIZE 30
#define MAX_ERROR 0.00001
#define BLUR_SIZE 11
#define BLUR_SIGMA 1.5
#define BLUR_AMP 1
#define MEDIAN_FILTER_SIZE 5
#define RELATIVE_ERROR(a, b) (fabs(((a) - (b))))
#define WRAP_PIXEL(p, size) ((p) < 0 ? abs((p)) - 1 : \
((p) >= (size) ? (2 * (size) - 1 - (p)) : (p)))
// Struct for an image pyramid
typedef struct {
int n_levels;
int widths[MAX_LEVELS];
int heights[MAX_LEVELS];
int strides[MAX_LEVELS];
unsigned char **levels;
} imPyramid;
typedef struct {
int stride;
double *confidence;
double *u;
double *v;
} flowMV;
static void * safe_malloc(size_t size) {
void *v = malloc(size);
if (!v) {
fprintf(stderr, "Not enough memory\n");
exit(EXIT_FAILURE);
}
return v;
}
// Produces an image pyramid where each level is resized by a
// specified resize factor
static void image_pyramid(unsigned char *img, imPyramid *pyr, int width,
int height, int stride, int n_levels) {
int i;
int max_levels = 1;
int scaled = width < height ? (width/MIN_LEVEL_SIZE) :
(height/MIN_LEVEL_SIZE);
pyr->widths[0] = width;
pyr->heights[0] = height;
pyr->strides[0] = stride;
if (n_levels == 1) {
pyr->n_levels = 1;
pyr->levels = (unsigned char**)safe_malloc(sizeof(*pyr->levels));
pyr->levels[0] = img;
return;
}
// compute number of possible levels of the pyramid. The smallest dim of the
// smallest level should be no less than 30 px.
while (scaled > 1) {
max_levels++;
scaled >>= 1;
}
if (n_levels > max_levels)
n_levels = max_levels;
pyr->n_levels = n_levels;
pyr->levels = (unsigned char**)safe_malloc(n_levels * sizeof(*pyr->levels));
pyr->levels[0] = img;
// compute each level
for (i = 1; i < n_levels; ++i) {
pyr->widths[i] = pyr->widths[i-1] >> 1;
pyr->heights[i] = pyr->heights[i-1] >> 1;
pyr->strides[i] = pyr->widths[i];
pyr->levels[i] = (unsigned char*)safe_malloc(pyr->widths[i] *
pyr->heights[i] *
sizeof(*pyr->levels[i]));
vp9_resize_plane(pyr->levels[i-1], pyr->heights[i-1], pyr->widths[i-1],
pyr->strides[i-1], pyr->levels[i], pyr->heights[i],
pyr->widths[i], pyr->strides[i]);
}
}
static void destruct_pyramid(imPyramid *pyr) {
int i;
// start at level 1 because level 0 is a reference to the original image
for (i = 1; i < pyr->n_levels; ++i)
free(pyr->levels[i]);
free(pyr->levels);
}
// Convolution with reflected content padding to minimize edge artifacts.
static void convolve_char(double *filter, unsigned char *image,
double *convolved, int filt_width, int filt_height,
int image_width, int image_height, int image_stride,
int convolved_stride) {
int i, j, x, y, imx, imy;
double pixel_val;
double filter_val;
double image_val;
int x_pad_size = filt_width >> 1;
int y_pad_size = filt_height >> 1;
for (j = 0; j < image_height; ++j)
for (i = 0; i < image_width; ++i) {
pixel_val = 0;
for (y = (-y_pad_size); y<= y_pad_size; y++)
for (x = (-x_pad_size); x <= x_pad_size; x++) {
filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
imx = WRAP_PIXEL(i + x, image_width);
imy = WRAP_PIXEL(j + y, image_height);
image_val = image[imx + imy * image_stride];
pixel_val += (filter_val * image_val);
}
convolved[i + j * convolved_stride] = pixel_val;
}
}
// Convolution with reflected content padding to minimize edge artifacts.
static void convolve_double(const double *filter, double *image,
double *convolved, int filt_width,
int filt_height, int image_width,
int image_height, int image_stride,
int convolved_stride) {
int i, j, x, y, imx, imy;
double pixel_val;
double filter_val;
double image_val;
int x_pad_size = filt_width >> 1;
int y_pad_size = filt_height >> 1;
for (j = 0; j < image_height; ++j)
for (i = 0; i < image_width; ++i) {
pixel_val = 0;
for (y = (-y_pad_size); y<= y_pad_size; y++)
for (x = (-x_pad_size); x <= x_pad_size; x++) {
filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
imx = WRAP_PIXEL(i + x, image_width);
imy = WRAP_PIXEL(j + y, image_height);
image_val = image[imx + imy * image_stride];
pixel_val += (filter_val * image_val);
}
convolved[i + j * convolved_stride] = pixel_val;
}
}
// computes x and y spatial derivatives of an image.
static void differentiate(double *img, int width, int height,
int stride, double *dx_img, double *dy_img,
int deriv_stride) {
const double spatial_deriv_kernel[] = {-0.0833, 0.6667, 0.0,
-0.6667, 0.0833};
convolve_double(spatial_deriv_kernel, img, dx_img, 5, 1, width, height,
stride, deriv_stride);
convolve_double(spatial_deriv_kernel, img, dy_img, 1, 5, width, height,
stride, deriv_stride);
}
// creates a 2D gaussian kernel
static void gaussian_kernel(double *mat, int size, double sig, double amp) {
int x, y;
double filter_val;
int half;
double denominator = 1 / (2.0 * sig * sig);
double sum = 0.0f;
size |= 1;
half = size >> 1;
for (y = -half; y<= half; y++)
for (x = -half; x<= half; x++) {
filter_val = amp * exp((double)(-1.0 * ((x * x + y * y) * denominator)));
mat[(x + half) + ((y + half) * size)] = filter_val;
sum += filter_val;
}
// normalize the filter
if (sum > MAX_ERROR) {
sum = 1 / sum;
for (x = 0; x < size * size; x++) {
mat[x] = mat[x] * sum;
}
}
}
// blurs image with a gaussian kernel
static void blur_img(unsigned char *img, int width, int height,
int stride, double *smooth_img, int smooth_stride,
int smoothing_size, double smoothing_sig,
double smoothing_amp) {
double *smoothing_kernel = (double *)safe_malloc(smoothing_size *
smoothing_size *
sizeof(*smoothing_kernel));
gaussian_kernel(smoothing_kernel, smoothing_size, smoothing_sig,
smoothing_amp);
convolve_char(smoothing_kernel, img, smooth_img, smoothing_size,
smoothing_size, width, height, stride, smooth_stride);
free(smoothing_kernel);
}
// Implementation of Hoare's select for linear time median selection.
// Takes in a pointer to the beginning of a list of values, the length
// of the list, an integer representing the index of the number to be
// selected, and an unsigned integer used to seed a random number generator.
static double selection(double *vals, int length, int k, unsigned int seed) {
int pivot_ind, x;
double pivot;
double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
int l = 0, e = 0, g = 0;
pivot_ind = (int) ((length / ((double)RAND_MAX)) * rand_r(&seed));
pivot = vals[pivot_ind];
for (x = 0; x < length; ++x) {
if (RELATIVE_ERROR(vals[x], pivot) <= MAX_ERROR) {
e++;
} else if (vals[x] < pivot) {
L[l] = vals[x];
l++;
} else if (vals[x] > pivot) {
G[g] = vals[x];
g++;
}
}
if (k <= l)
return selection(L, l, k, seed);
else if (k <= l + e)
return pivot;
else
return selection(G, g, k - l - e, seed);
}
// Performs median filtering for denoising. This implementation uses hoare's
// select to find the median in a block. block_x, block_y are
// the x and y coordinates of the center of the block in the source.
static void median_filter(double *source, double *filtered, int block_size,
int width, int height, int source_stride,
int filtered_stride) {
int i, j, block_x, block_y, imx, imy;
double pivot, val;
int length = block_size * block_size;
double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
int k = length >> 1 | 1;
int l, e, g;
int pad_size = block_size >> 1;
unsigned int seed = (unsigned int)*source;
// find the median within each block. Reflected content is used for padding.
for (block_y = 0; block_y < height; ++block_y)
for (block_x = 0; block_x < width; ++block_x) {
l = 0, e = 0, g = 0;
memset(L, 0, length * sizeof(*L));
memset(G, 0, length * sizeof(*G));
pivot = source[block_x + block_y * source_stride];
for (j = -pad_size; j <= pad_size; ++j)
for (i = -pad_size; i <= pad_size; ++i) {
imx = WRAP_PIXEL(i + block_x, width);
imy = WRAP_PIXEL(j + block_y, height);
val = source[imx + imy * source_stride];
// pulling out the first iteration of selection so we don't have
// to iterate through the block to flatten it and then
// iterate through those same values to put them into
// the L E G bins in selection. Ugly but more efficent.
if (RELATIVE_ERROR(val, pivot) <= MAX_ERROR) {
e++;
} else if (val < pivot) {
L[l] = val;
l++;
} else if (val > pivot) {
G[g] = val;
g++;
}
}
if (k <= l)
filtered[block_x + block_y * filtered_stride] =
selection(L, l, k, seed);
else if (k <= l + e)
filtered[block_x + block_y * filtered_stride] = pivot;
else
filtered[block_x + block_y * filtered_stride] = selection(G, g, k -
l - e, seed);
}
}
static inline void pointwise_matrix_sub(double *mat1, double *mat2,
double *diff, int width, int height,
int stride1, int stride2,
int diffstride) {
int i, j;
for (j = 0; j < height; ++j)
for (i = 0; i < width; ++i) {
diff[i + j * diffstride] = mat1[i + j * stride1] - mat2[i + j * stride2];
}
}
static inline void pointwise_matrix_add(double *mat1, double *mat2,
double *sum, int width, int height,
int stride1, int stride2,
int sumstride) {
int i, j;
for (j = 0; j < height; ++j)
for (i = 0; i < width; ++i) {
sum[i + j * sumstride] = mat1[i + j * stride1] + mat2[i + j * stride2];
}
}
// Solves lucas kanade equation at any give pixel in the image using a local
// neighborhood for support. loc_x and loc_y are the x and y components
// of the center of the local neighborhood. Assumes dx, dy and dt have same
// stride. window is a wind_size x wind_size set of weights for the
// window_size x window_size neighborhood surrounding loc_x and loc_y.
static void optical_flow_per_pixel(double *dx, double *dy, double *dt,
const double *window, int wind_size,
flowMV *flow, int width, int height,
int stride, int loc_x, int loc_y) {
int i, j, iw, jw, im_ind;
double g = 0;
// M and b are matrices used to solve the equation a = M^-1 * b where a
// are the desired optical flow parameters
double M[4] = {0, 0, 0, 0};
double b[2] = {0, 0};
double det = 0;
double trace2 = 0;
double corner_score = 0;
int step = wind_size >> 1;
double gdx = 0, gdy = 0;
for (j = loc_y - step; j <= loc_y + step; ++j)
for (i = loc_x - step; i <= loc_x + step; ++i) {
// if pixel is out of bounds, use reflected image content
iw = WRAP_PIXEL(i, width);
jw = WRAP_PIXEL(j, height);
im_ind = iw + jw * stride;
g = window[(i - loc_x + step) + (j - loc_y + step) * wind_size];
gdx = g * dx[im_ind];
gdy = g * dy[im_ind];
M[0] += gdx * dx[im_ind];
M[1] += gdx * dy[im_ind];
M[3] += gdy * dy[im_ind];
b[0] += -gdx * dt[im_ind];
b[1] += -gdy * dt[im_ind];
}
M[2] = M[1];
det = (M[0] * M[3]) - (M[1] * M[1]);
trace2 = (M[0] + M[3]) * (M[0] + M[3]);
if (RELATIVE_ERROR(det, 0) > MAX_ERROR) {
const double det_inv = 1 / det;
const double mult_b0 = det_inv * b[0];
const double mult_b1 = det_inv * b[1];
flow->u[loc_x + loc_y * flow->stride] = M[3] * mult_b0 - M[1] * mult_b1;
flow->v[loc_x + loc_y * flow->stride] = -M[2] * mult_b0 + M[0] * mult_b1;
} else {
if (M[0] == 0 && M[3] == 0) {
flow->u[loc_x + loc_y * flow->stride] = 0;
flow->v[loc_x + loc_y * flow->stride] = 0;
} else {
const double trace2_inv = 1 / trace2;
const double mult_b0 = trace2_inv * b[0];
const double mult_b1 = trace2_inv * b[1];
flow->u[loc_x + loc_y * flow->stride] = M[0] * mult_b0 + M[1] * mult_b1;
flow->v[loc_x + loc_y * flow->stride] = M[2] * mult_b0 + M[3] * mult_b1;
}
}
// compute inverse harris corner score as confidence metric
corner_score = 1 / (det - (0.01 * trace2));
flow->confidence[loc_x + loc_y * flow->stride] = corner_score > 2 ?
2 : corner_score;
}
// computes lucas kanade optical flow. Note that this assumes images that
// already denoised and differentiated.
static void lucas_kanade_base(double *smooth_frm, double *smooth_ref,
double *dx, double *dy, double *dt, flowMV *flow,
int width, int height, int smooth_frm_stride,
int smooth_ref_stride, int deriv_stride) {
int i, j;
const int local_neighborhood_sz = 5;
// windowing function for local neighborhood weights
const double window[] = {0.0039, 0.0156, 0.0234, 0.0156, 0.0039,
0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
0.0234, 0.0938, 0.1406, 0.0938, 0.0234,
0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
0.0039, 0.0156, 0.0234, 0.0156, 0.0039};
// compute temporal derivative
pointwise_matrix_sub(smooth_ref, smooth_frm, dt, width, height,
smooth_ref_stride, smooth_frm_stride, deriv_stride);
for (j = 0; j < height; ++j)
for (i = 0; i < width; ++i) {
optical_flow_per_pixel(dx, dy, dt, window, local_neighborhood_sz,
flow, width, height, deriv_stride, i, j);
}
}
// Improves an initial approximation for the vector field by iteratively
// warping one image towards the other.
static void iterative_refinement(unsigned char *ref, double *smooth_frm,
double *dx, double *dy, double *dt,
flowMV *flow, int width, int height,
int ref_stride, int smooth_frm_stride,
int deriv_stride, int n_refinements) {
int i, j, n;
double x, y;
unsigned char *estimate = (unsigned char*)safe_malloc(width * height *
sizeof(*estimate));
double *smooth_estimate = (double*)safe_malloc(width * height *
sizeof(*smooth_estimate));
flowMV new_flow;
new_flow.u = (double*)safe_malloc(width * height * sizeof(*new_flow.u));
new_flow.v = (double*)safe_malloc(width * height * sizeof(*new_flow.v));
new_flow.confidence = (double*)safe_malloc(width * height *
sizeof(*new_flow.confidence));
new_flow.stride = width;
// warp one image toward the other
for (n = 0; n < n_refinements; ++n) {
for (j = 0; j < height; ++j)
for (i = 0; i < width; ++i) {
x = i - flow->u[i + j * flow->stride];
y = j - flow->v[i + j * flow->stride];
estimate[i + j * width] = interpolate(ref, x, y, width,
height, ref_stride);
}
// compute flow between frame and warped estimate
blur_img(estimate, width, height, width, smooth_estimate, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
lucas_kanade_base(smooth_frm, smooth_estimate, dx, dy, dt, &new_flow,
width, height, smooth_frm_stride, width, deriv_stride);
// add residual and apply median filter for denoising
pointwise_matrix_add(flow->u, new_flow.u, new_flow.u, width, height,
flow->stride, new_flow.stride, new_flow.stride);
pointwise_matrix_add(flow->v, new_flow.v, new_flow.v, width, height,
flow->stride, new_flow.stride, new_flow.stride);
median_filter(new_flow.u, flow->u, MEDIAN_FILTER_SIZE, width, height,
new_flow.stride, flow->stride);
median_filter(new_flow.v, flow->v, MEDIAN_FILTER_SIZE, width, height,
new_flow.stride, flow->stride);
median_filter(new_flow.confidence, flow->confidence, MEDIAN_FILTER_SIZE,
width, height, new_flow.stride, flow->stride);
}
free(smooth_estimate);
free(estimate);
free(new_flow.u);
free(new_flow.v);
free(new_flow.confidence);
}
// interface for computing optical flow.
void compute_flow(unsigned char *frm, unsigned char *ref,
double *u, double *v, double *confidence,
int width, int height, int frm_stride,
int ref_stride, int n_levels) {
double *smooth_ref = (double*)safe_malloc(width * height *
sizeof(*smooth_ref));
double *smooth_frm = (double*)safe_malloc(width * height *
sizeof(*smooth_frm));
double *dx_frm = (double*)safe_malloc(width * height *
sizeof(*dx_frm));
double *dy_frm = (double*)safe_malloc(width * height *
sizeof(*dx_frm));
double *dt = (double *)safe_malloc(width * height * sizeof(*dt));
flowMV flow;
flow.u = u;
flow.v = v;
flow.confidence = confidence;
flow.stride = width;
if (n_levels == 1) {
// Uses lucas kanade and iterative estimation without coarse to fine.
// This is more efficient for small motion but becomes less accurate
// as motion gets larger.
blur_img(ref, width, height, ref_stride, smooth_ref, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
blur_img(frm, width, height, frm_stride, smooth_frm, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
differentiate(smooth_frm, width, height, width, dx_frm, dy_frm, width);
lucas_kanade_base(smooth_frm, smooth_ref, dx_frm, dy_frm, dt, &flow,
width, height, width, width, width);
iterative_refinement(ref, smooth_frm, dx_frm, dy_frm, dt, &flow, width,
height, ref_stride, width, width, ITERATIONS);
} else {
int i, j, k;
// w, h, s_r, s_f are intermediate width, height, ref stride, frame stride
// as the resolution changes up the pyramid
int w, h, s_r, s_f, w_upscale, h_upscale, s_upscale;
double x, y;
double *smooth_frm_approx = (double*)safe_malloc(width * height *
sizeof(*smooth_frm_approx));
double *dx_frm_approx = (double*)safe_malloc(width * height *
sizeof(*dx_frm_approx));
double *dy_frm_approx = (double*)safe_malloc(width * height *
sizeof(*dy_frm_approx));
double *u_upscale = (double*)safe_malloc(width * height *
sizeof(*u_upscale));
double *v_upscale = (double*)safe_malloc(width * height *
sizeof(*v_upscale));
unsigned char *frm_approx = (unsigned char*)safe_malloc(width * height *
sizeof(*frm_approx));
imPyramid *frm_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
imPyramid *ref_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
// create image pyramids
image_pyramid(frm, frm_pyramid, width, height, frm_stride, n_levels);
image_pyramid(ref, ref_pyramid, width, height, ref_stride, n_levels);
n_levels = frm_pyramid->n_levels;
w = frm_pyramid->widths[n_levels - 1];
h = frm_pyramid->heights[n_levels - 1];
s_r = ref_pyramid->strides[n_levels - 1];
s_f = frm_pyramid->strides[n_levels - 1];
// for each level in the pyramid starting with the coarsest
for (i = n_levels - 1; i >= 0; --i) {
assert(frm_pyramid->widths[i] == ref_pyramid->widths[i]);
assert(frm_pyramid->heights[i] == ref_pyramid->heights[i]);
blur_img(ref_pyramid->levels[i], w, h, s_r, smooth_ref, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
blur_img(frm_pyramid->levels[i], w, h, s_f, smooth_frm, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
differentiate(smooth_frm, w, h, width, dx_frm, dy_frm, width);
// Compute optical flow at this level between the reference frame and
// the estimate produced from warping. If this is the first iteration
// (meaning no warping has happened yet) then we have no approximation
// for the frame and only have to worry about flow between the original
// frame and reference. Every subsequent iteration requires computing
// and estimate for the frame based on previously computed flow vectors.
if (i < n_levels - 1) {
blur_img(frm_approx, w, h, width, smooth_frm_approx, width, BLUR_SIZE,
BLUR_SIGMA, BLUR_AMP);
differentiate(smooth_frm_approx, w, h, width, dx_frm_approx,
dy_frm_approx, width);
lucas_kanade_base(smooth_frm_approx, smooth_ref, dx_frm_approx,
dy_frm_approx, dt, &flow, w, h, width, width, width);
pointwise_matrix_add(flow.u, u_upscale, u_upscale, w, h, flow.stride,
width, width);
pointwise_matrix_add(flow.v, v_upscale, v_upscale, w, h, flow.stride,
width, width);
median_filter(u_upscale, flow.u, MEDIAN_FILTER_SIZE, w, h, width,
flow.stride);
median_filter(v_upscale, flow.v, MEDIAN_FILTER_SIZE, w, h, width,
flow.stride);
} else {
lucas_kanade_base(smooth_frm, smooth_ref, dx_frm,
dy_frm, dt, &flow, w, h, width, width, width);
}
iterative_refinement(ref_pyramid->levels[i], smooth_frm, dx_frm, dy_frm,
dt, &flow, w, h, s_r, width, width, ITERATIONS);
// if we're at the finest level, we're ready to return u and v
if (i == 0) {
assert(w == width);
assert(h == height);
destruct_pyramid(frm_pyramid);
destruct_pyramid(ref_pyramid);
free(frm_pyramid);
free(ref_pyramid);
free(frm_approx);
free(smooth_frm_approx);
free(dx_frm_approx);
free(dy_frm_approx);
free(u_upscale);
free(v_upscale);
break;
}
w_upscale = ref_pyramid->widths[i - 1];
h_upscale = ref_pyramid->heights[i - 1];
s_upscale = ref_pyramid->strides[i - 1];
// warp image according to optical flow estimate
for (j = 0; j < h_upscale; ++j)
for (k = 0; k < w_upscale; ++k) {
u_upscale[k + j * w_upscale] = flow.u[(int)(k >> 1) +
(int)(j >> 1) * flow.stride];
v_upscale[k + j * w_upscale] = flow.v[(int)(k >> 1) +
(int)(j >> 1) * flow.stride];
x = k - u_upscale[k + j * w_upscale];
y = j - v_upscale[k + j * w_upscale];
frm_approx[k + j * width] = interpolate(ref_pyramid->levels[i - 1],
x, y, w_upscale, h_upscale,
s_upscale);
}
// assign dimensions for next level
w = frm_pyramid->widths[i - 1];
h = frm_pyramid->heights[i - 1];
s_r = ref_pyramid->strides[i - 1];
s_f = frm_pyramid->strides[i - 1];
}
}
free(smooth_ref);
free(smooth_frm);
free(dx_frm);
free(dy_frm);
free(dt);
}