Optical flow for computing global motion params

Addressing comments, fixing style issues Change-Id: I91b72ab5cdf80d68476858f442616ab3af41e709
2015-12-02 12:24:25 -08:00 · 2015-12-02 12:24:25 -08:00 · a7aab42915
commit a7aab42915
parent 9227a372d3
12 changed files with 906 additions and 44 deletions
--- a/vp9/common/vp9_motion_model.c
+++ b/vp9/common/vp9_motion_model.c
@ -18,7 +18,7 @@
 #include "vp9/common/vp9_mv.h"
 #include "vp9/common/vp9_motion_model.h"

-INLINE projectPointsType get_projectPointsType(TransformationType type) {
+inline projectPointsType get_projectPointsType(TransformationType type) {
  switch (type) {
    case HOMOGRAPHY:
      return projectPointsHomography;
@ -118,8 +118,8 @@ double bicubic(unsigned char *ref, double x, double y, int stride) {
  return getCubicValue(arr, x - i);
 }

-static unsigned char interpolate(unsigned char *ref, double x, double y,
-                              int width, int height, int stride) {
+unsigned char interpolate(unsigned char *ref, double x, double y,
+                          int width, int height, int stride) {
  if (x < 0 && y < 0) return ref[0];
  else if (x < 0 && y > height - 1)
    return ref[(height - 1) * stride];
@ -215,6 +215,34 @@ static void WarpImage(TransformationType type, double *H,
    }
  }
 }
+
+// computes warp error
+double compute_warp_and_error(TransformationType type,
+                              unsigned char *ref,
+                              unsigned char *frm,
+                              int width, int height, int stride,
+                              double *H) {
+  int i, j;
+  double warped;
+  double mse = 0;
+  double err = 0;
+  projectPointsType projectPoints = get_projectPointsType(type);
+  if (projectPoints == NULL) return -1.0;
+  for (i = 0; i < height; ++i)
+    for (j = 0; j < width; ++j) {
+      double in[2], out[2];
+      in[0] = j;
+      in[1] = i;
+      projectPoints(H, in, out, 1, 2, 2);
+      warped = interpolate(ref, out[0], out[1], width, height, stride);
+      err = warped - frm[j + i * stride];
+      mse += err * err;
+    }
+
+  mse /= (width * height);
+  return mse;
+}
+
 // Computes the ratio of the warp error to the zero motion error
 double vp9_warp_erroradv_unq(TransformationType type, double *H,
                             unsigned char *ref,
--- a/vp9/common/vp9_motion_model.h
+++ b/vp9/common/vp9_motion_model.h
@ -49,8 +49,6 @@ static INLINE int get_numparams(TransformationType type) {
  }
 }

-INLINE projectPointsType get_projectPointsType(TransformationType type);
-
 void projectPointsHomography(double *mat, double *points, double *proj,
                             const int n, const int stride_points,
                             const int stride_proj);
@ -64,6 +62,8 @@ void projectPointsTranslation(double *mat, double *points, double *proj,
                              const int n, const int stride_points,
                              const int stride_proj);

+projectPointsType get_projectPointsType(TransformationType type);
+
 void vp9_convert_params_to_rotzoom(Global_Motion_Params *model, double *H);

 void vp9_warp_plane(Global_Motion_Params *gm,
@ -92,6 +92,18 @@ double vp9_warp_erroradv_unq(TransformationType type, double *H,
                             int subsampling_col, int subsampling_row,
                             int x_scale, int y_scale);

+double compute_warp_and_error(TransformationType type,
+                              unsigned char *ref,
+                              unsigned char *frm,
+                              int width, int height, int stride,
+                              double *H);
+
+
+
+unsigned char interpolate(unsigned char *ref, double x, double y,
+                          int width, int height, int stride);
+
+
 int_mv vp9_get_global_sb_center_mv(int col, int row, int bw, int bh,
                                   Global_Motion_Params *model);
 int_mv vp9_get_global_sub8x8_center_mv(int col, int row, int block,
--- a/vp9/encoder/vp9_corner_match.c
+++ b/vp9/encoder/vp9_corner_match.c
@ -24,11 +24,6 @@

 #define THRESHOLD_NCC   0.80

-typedef struct {
-  int x, y;
-  int rx, ry;
-} correspondence;
-
 static double compute_variance(unsigned char *im, int stride,
                               int x, int y, double *mean) {
  double sum = 0.0;
@ -121,8 +116,8 @@ static void improve_correspondence(unsigned char *frm, unsigned char *ref,
        }
      }
    }
-    correspondences[i].rx += best_x;
-    correspondences[i].ry += best_y;
+    correspondences[i].rx += (double) best_x;
+    correspondences[i].ry += (double) best_y;
  }
  for (i = 0; i < num_correspondences; ++i) {
    double template_norm = compute_variance(
@ -164,7 +159,7 @@ int determine_correspondence(unsigned char *frm,
                             int *ref_corners, int num_ref_corners,
                             int width, int height,
                             int frm_stride, int ref_stride,
-                             int *correspondence_pts) {
+                             double *correspondence_pts) {
  // Debargha: Improve this to include 2-way match
  int i, j;
  correspondence *correspondences = (correspondence *)correspondence_pts;
@ -203,18 +198,12 @@ int determine_correspondence(unsigned char *frm,
      }
    }
    if (best_match_ncc > THRESHOLD_NCC) {
-      correspondences[num_correspondences].x = frm_corners[2 * i];
-      correspondences[num_correspondences].y = frm_corners[2 * i + 1];
-      correspondences[num_correspondences].rx = ref_corners[2 * best_match_j];
-      correspondences[num_correspondences].ry =
+      correspondences[num_correspondences].x = (double) frm_corners[2 * i];
+      correspondences[num_correspondences].y = (double) frm_corners[2 * i + 1];
+      correspondences[num_correspondences].rx = (double)
+          ref_corners[2 * best_match_j];
+      correspondences[num_correspondences].ry = (double)
          ref_corners[2 * best_match_j + 1];
-      /*
-      printf("  %d %d %d %d\n",
-             correspondences[num_correspondences].x,
-             correspondences[num_correspondences].y,
-             correspondences[num_correspondences].rx,
-             correspondences[num_correspondences].ry);
-             */
      num_correspondences++;
    }
  }
--- a/vp9/encoder/vp9_corner_match.h
+++ b/vp9/encoder/vp9_corner_match.h
@ -16,12 +16,17 @@
 #include <stdlib.h>
 #include <memory.h>

+typedef struct {
+  double x, y;
+  double rx, ry;
+} correspondence;
+
 int determine_correspondence(unsigned char *frm,
                             int *frm_corners, int num_frm_corners,
                             unsigned char *ref,
                             int *ref_corners, int num_ref_corners,
                             int width, int height,
                             int frm_stride, int ref_stride,
-                             int *correspondence_pts);
+                             double *correspondence_pts);

 #endif  // VP9_ENCODER_VP9_CORNER_MATCH_H
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@ -4119,6 +4119,7 @@ static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
 #define GLOBAL_MOTION_ADVANTAGE_THRESH_RZ 0.60
 #define GLOBAL_MOTION_ADVANTAGE_THRESH_TR 0.75
 // #define USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
+// #define USE_FEATURE_BASED_GLOBAL_MOTION_COMPUTATION

 static void convert_translation_to_params(
    double *H, Global_Motion_Params *model) {
@ -4191,7 +4192,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {

  xd->mi = cm->mi;
  xd->mi[0].src_mi = &xd->mi[0];
-
  vp9_zero(cm->counts);
  vp9_zero(cpi->coef_counts);
  vp9_zero(rd_opt->comp_pred_diff);
@ -4217,6 +4217,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
    YV12_BUFFER_CONFIG *ref_buf;
    int num, frame;
    double global_motion[9 * MAX_GLOBAL_MOTION_MODELS];
+
    for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
      ref_buf = get_ref_frame_buffer(cpi, frame);
      if (ref_buf) {
@ -4225,11 +4226,15 @@ static void encode_frame_internal(VP9_COMP *cpi) {
             vp9_compute_global_motion_multiple_block_based(
                 cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
                 BLOCK_16X16, MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
-#else
+#elif defined(USE_FEATURE_BASED_GLOBAL_MOTION_COMPUTATION)
             vp9_compute_global_motion_multiple_feature_based(
                 cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
                 MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
-#endif  // USE_BLOCK_BASED_GLOBAL_MOTION_COMPUTATION
+#else
+             vp9_compute_global_motion_multiple_optical_flow(
+                 cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
+                 MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
+#endif
          int i;
          for (i = 0; i < num; i++) {
            convert_model_to_params(
@ -4285,6 +4290,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
                  }
                }
              }
+              if (get_gmtype(&cm->global_motion[frame][i]) != GLOBAL_ZERO)
+                printf("Found it %d/%d - %d [%f %f]\n",
+                       cm->current_video_frame, cm->show_frame, frame,
+                       erroradvantage, erroradvantage_trans);
            }
          }
          cm->num_global_motion[frame] = num;
@ -4292,6 +4301,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
      }
    }
  }
+
 #endif  // CONFIG_GLOBAL_MOTION

 #if CONFIG_VP9_HIGHBITDEPTH
--- a/vp9/encoder/vp9_global_motion.c
+++ b/vp9/encoder/vp9_global_motion.c
@ -25,6 +25,7 @@
 #include "vp9/common/vp9_motion_model.h"
 #include "vp9/encoder/vp9_corner_detect.h"
 #include "vp9/encoder/vp9_corner_match.h"
+#include "vp9/encoder/vp9_opticalflow.h"
 #include "vp9/encoder/vp9_ransac.h"
 #include "vp9/encoder/vp9_global_motion.h"
 #include "vp9/encoder/vp9_motion_field.h"
@ -36,6 +37,8 @@

 #define MIN_INLIER_PROB 0.1

+// #define PARAM_SEARCH
+
 #define MAX_CORNERS 4096

 INLINE ransacType get_ransacType(TransformationType type) {
@ -92,13 +95,60 @@ static double compute_error_score(TransformationType type,
  return sqrt(sqerr / n);
 }

+void refine_param(TransformationType type, unsigned char *frm,
+                  unsigned char *ref,  double *H,
+                  int param_index, int width, int height,
+                  int stride, int n_refinements) {
+  int i;
+  double step_mse;
+  double best_mse;
+  double curr_mse;
+  double curr_param = H[param_index];
+  double step = 0.5;
+  double best_param = curr_param;
+
+  curr_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
+  best_mse = curr_mse;
+  for (i = 0; i < n_refinements; i++) {
+    // look to the left
+    H[param_index] = curr_param - step;
+    step_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
+    if (step_mse < best_mse) {
+      step /= 2;
+      best_mse = step_mse;
+      best_param = H[param_index];
+      curr_param = best_param;
+      curr_mse = step_mse;
+      continue;
+    }
+
+    // look to the right
+    H[param_index] = curr_param + step;
+    step_mse = compute_warp_and_error(type, ref, frm, width, height, stride, H);
+    if (step_mse < best_mse) {
+      step /= 2;
+      best_mse = step_mse;
+      best_param = H[param_index];
+      curr_param = best_param;
+      curr_mse = step_mse;
+      continue;
+    }
+
+    // no improvement found-> means we're either already at a minimum or
+    // step is too wide
+    step /= 4;
+  }
+
+  H[param_index] = best_param;
+}
+
 static int compute_global_motion_single(TransformationType type,
-                                        int *correspondences,
+                                        double *correspondences,
                                        int num_correspondences,
                                        double *H,
                                        int *inlier_map) {
  double *mp, *matched_points;
-  int *cp = correspondences;
+  double *cp = correspondences;
  int i, result;
  int num_inliers = 0;
  ransacType ransac = get_ransacType(type);
@ -138,12 +188,13 @@ int vp9_compute_global_motion_single_feature_based(
    double *H) {
  int num_frm_corners, num_ref_corners;
  int num_correspondences;
-  int *correspondences;
+  double *correspondences;
  int num_inliers;
  int *inlier_map = NULL;
  int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
  (void) cpi;

+
 #ifdef USE_FAST_CORNER
  num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width,
                                     frm->y_height, frm->y_stride,
@ -162,7 +213,7 @@ int vp9_compute_global_motion_single_feature_based(
  printf("Reference corners = %d\n", num_ref_corners);
 #endif

-  correspondences = (int *) malloc(num_frm_corners * 4 *
+  correspondences = (double *) malloc(num_frm_corners * 4 *
                                   sizeof(*correspondences));

  num_correspondences = determine_correspondence(frm->y_buffer,
@ -201,14 +252,14 @@ int vp9_compute_global_motion_single_feature_based(
 }

 static int compute_global_motion_multiple(TransformationType type,
-                                          int *correspondences,
+                                          double *correspondences,
                                          int num_correspondences,
                                          double *H,
                                          int max_models,
                                          double inlier_prob,
                                          int *num_models,
                                          int *processed_mask) {
-  int *cp = correspondences;
+  double *cp = correspondences;
  double *mp, *matched_points;
  int *best_inlier_mask;
  int i, result;
@ -268,6 +319,85 @@ static int compute_global_motion_multiple(TransformationType type,
  return num_inliers_sum;
 }

+int vp9_compute_global_motion_multiple_optical_flow(struct VP9_COMP *cpi,
+                                                    TransformationType type,
+                                                    YV12_BUFFER_CONFIG *frm,
+                                                    YV12_BUFFER_CONFIG *ref,
+                                                    int max_models,
+                                                    double inlier_prob,
+                                                    double *H) {
+  int num_correspondences = 0;
+  double *correspondence_pts = (double *)malloc(frm->y_width * frm->y_height *
+                                                sizeof(correspondence));
+  correspondence *correspondences = (correspondence *)correspondence_pts;
+  int num_inliers;
+  int i, j;
+  int num_models = 0;
+  int *inlier_map = NULL;
+  double *u = (double *)malloc(frm->y_width * frm->y_height * sizeof(u));
+  double *v = (double *)malloc(frm->y_width * frm->y_height * sizeof(v));
+  double *confidence = (double *)malloc(frm->y_width * frm->y_height *
+                                        sizeof(confidence));
+
+  (void) cpi;
+  compute_flow(frm->y_buffer, ref->y_buffer, u, v, confidence,
+               frm->y_width, frm->y_height, frm->y_stride,
+               ref->y_stride, 1);
+
+  // avoid the boundaries because of artifacts
+  for (i = 10; i < frm->y_width - 10; ++i)
+    for (j = 10; j < frm->y_height - 10; ++j) {
+      // Note that this threshold could still benefit from tuning. Confidence
+      // is based on the inverse of the harris corner score.
+      if (confidence[i + j * frm->y_width] < 0.01) {
+        correspondences[num_correspondences].x = i;
+        correspondences[num_correspondences].y = j;
+        correspondences[num_correspondences].rx = i - u[i + j * frm->y_width];
+        correspondences[num_correspondences].ry = j - v[i + j * frm->y_width];
+        num_correspondences++;
+      }
+    }
+  printf("Number of correspondences = %d\n", num_correspondences);
+#ifdef VERBOSE
+  printf("Number of correspondences = %d\n", num_correspondences);
+#endif
+  inlier_map = (int *)malloc(num_correspondences * sizeof(*inlier_map));
+  num_inliers = compute_global_motion_multiple(type, correspondence_pts,
+                                               num_correspondences, H,
+                                               max_models, inlier_prob,
+                                               &num_models, inlier_map);
+#ifdef PARAM_SEARCH
+  for (j = 0; j < num_models; ++j) {
+    for (i = 0; i < get_numparams(type); ++i)
+
+      refine_param(type, frm->y_buffer, ref->y_buffer, H,
+                  i + j * get_numparams(type), frm->y_width, frm->y_height,
+                  frm->y_stride, 2);
+  }
+#endif
+
+#ifdef VERBOSE
+  printf("Models = %d, Inliers = %d\n", num_models, num_inliers);
+  if (num_models)
+    printf("Error Score (inliers) = %g\n",
+           compute_error_score(type, correspondences, 4, correspondences + 2, 4,
+                               num_correspondences, H, inlier_map));
+    printf("Warp error score = %g\n",
+           vp9_warp_error_unq(ROTZOOM, H, ref->y_buffer, ref->y_crop_width,
+                              ref->y_crop_height, ref->y_stride,
+                              frm->y_buffer, 0, 0,
+                              frm->y_crop_width, frm->y_crop_height,
+                              frm->y_stride, 0, 0, 16, 16));
+#endif
+  (void) num_inliers;
+  free(correspondences);
+  free(inlier_map);
+  free(u);
+  free(v);
+  free(confidence);
+  return num_models;
+}
+
 // Returns number of models actually returned
 int vp9_compute_global_motion_multiple_feature_based(
    struct VP9_COMP *cpi,
@ -279,12 +409,15 @@ int vp9_compute_global_motion_multiple_feature_based(
    double *H) {
  int num_frm_corners, num_ref_corners;
  int num_correspondences;
-  int *correspondences;
+  double *correspondences;
  int num_inliers;
+  int i, j;
  int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
  int num_models = 0;
  int *inlier_map = NULL;
  (void) cpi;
+  (void) i;
+  (void) j;

 #ifdef USE_FAST_CORNER
  num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width,
@ -304,7 +437,7 @@ int vp9_compute_global_motion_multiple_feature_based(
  printf("Reference corners = %d\n", num_ref_corners);
 #endif

-  correspondences = (int *) malloc(num_frm_corners * 4 *
+  correspondences = (double *) malloc(num_frm_corners * 4 *
                                   sizeof(*correspondences));

  num_correspondences = determine_correspondence(frm->y_buffer,
@ -316,6 +449,7 @@ int vp9_compute_global_motion_multiple_feature_based(
                                                 frm->y_width, frm->y_height,
                                                 frm->y_stride, ref->y_stride,
                                                 correspondences);
+  printf("Number of correspondences = %d\n", num_correspondences);
 #ifdef VERBOSE
  printf("Number of correspondences = %d\n", num_correspondences);
 #endif
@ -324,6 +458,16 @@ int vp9_compute_global_motion_multiple_feature_based(
                                               num_correspondences, H,
                                               max_models, inlier_prob,
                                               &num_models, inlier_map);
+#ifdef PARAM_SEARCH
+  for (j = 0; j < num_models; ++j) {
+    for (i = 0; i < get_numparams(type); ++i)
+
+      refine_param(type, frm->y_buffer, ref->y_buffer, H,
+                   i + j * get_numparams(type), frm->y_width, frm->y_height,
+                   frm->y_stride, 3);
+  }
+#endif
+
 #ifdef VERBOSE
  printf("Models = %d, Inliers = %d\n", num_models, num_inliers);
  if (num_models)
@ -352,7 +496,7 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
                                                 double *H) {
  VP9_COMMON *const cm = &cpi->common;
  int num_correspondences = 0;
-  int *correspondences;
+  double *correspondences;
  int num_inliers;
  int *inlier_map = NULL;
  int bwidth = num_4x4_blocks_wide_lookup[bsize] << 2;
@ -363,7 +507,7 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,

  vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);

-  correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
+  correspondences = (double *)malloc(4 * cm->mb_rows * cm->mb_cols *
                                  sizeof(*correspondences));

  for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
@ -407,7 +551,7 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
                                                   double *H) {
  VP9_COMMON *const cm = &cpi->common;
  int num_correspondences = 0;
-  int *correspondences;
+  double *correspondences;
  int num_inliers;
  int num_models = 0;
  int *inlier_map = NULL;
@ -419,7 +563,7 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
  double confidence[4096];
  vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);

-  correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
+  correspondences = (double *)malloc(4 * cm->mb_rows * cm->mb_cols *
                                  sizeof(*correspondences));

  for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
--- a/vp9/encoder/vp9_global_motion.h
+++ b/vp9/encoder/vp9_global_motion.h
@ -28,6 +28,14 @@ static const int CONFIDENCE_THRESHOLD = 1.0;

 INLINE ransacType get_ransacType(TransformationType type);

+// Searches around each parameter and seeks to minimize MSE  between
+// the warped frame produced from the set of parameters and the frame being
+// approximated.
+void refine_param(TransformationType type, unsigned char *frm,
+                  unsigned char *ref, double *H,
+                  int param_index, int width, int height,
+                  int stride, int n_refinements);
+
 // Returns number of models actually returned: 1 - if success, 0 - if failure
 int vp9_compute_global_motion_single_feature_based(struct VP9_COMP *cpi,
                                                   TransformationType type,
@ -58,6 +66,15 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
                                                 BLOCK_SIZE bsize,
                                                 double *H);

+int vp9_compute_global_motion_multiple_optical_flow(struct VP9_COMP *cpi,
+                                                    TransformationType type,
+                                                    YV12_BUFFER_CONFIG *frm,
+                                                    YV12_BUFFER_CONFIG *ref,
+                                                    int max_models,
+                                                    double inlier_prob,
+                                                    double *H);
+
+
 // Returns number of models actually returned: 1+ - #models, 0 - if failure
 // max_models is the maximum number of models returned
 // inlier_prob is the probability of being inlier over all the models
--- a/vp9/encoder/vp9_opticalflow.c
+++ b/vp9/encoder/vp9_opticalflow.c
@ -0,0 +1,623 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be
+ *  found  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "vp9/common/vp9_motion_model.h"
+#include "vp9/encoder/vp9_opticalflow.h"
+#include "vp9/encoder/vp9_ransac.h"
+#include "vp9/encoder/vp9_resize.h"
+
+// ITERATIONS is the number of iterative refinements to make on an initial
+// flow estimate. 3 seems to work well but feel free to play with this.
+#define ITERATIONS 3
+#define MAX_MEDIAN_LENGTH  121
+#define MAX_LEVELS 8
+#define MIN_LEVEL_SIZE 30
+#define MAX_ERROR  0.00001
+#define BLUR_SIZE 11
+#define BLUR_SIGMA 1.5
+#define BLUR_AMP 1
+#define MEDIAN_FILTER_SIZE 5
+#define RELATIVE_ERROR(a, b) (fabs(((a) - (b))))
+#define WRAP_PIXEL(p, size) ((p) < 0 ? abs((p)) - 1 : \
+                            ((p) >= (size) ? (2 * (size) - 1 - (p)) : (p)))
+
+// Struct for an image pyramid
+typedef struct {
+  int n_levels;
+  int widths[MAX_LEVELS];
+  int heights[MAX_LEVELS];
+  int strides[MAX_LEVELS];
+  unsigned char **levels;
+} imPyramid;
+
+typedef struct {
+  int stride;
+  double *confidence;
+  double *u;
+  double *v;
+} flowMV;
+
+static void * safe_malloc(size_t size) {
+  void *v = malloc(size);
+  if (!v) {
+    fprintf(stderr, "Not enough memory\n");
+    exit(EXIT_FAILURE);
+  }
+  return v;
+}
+
+// Produces an image pyramid where each level is resized by a
+// specified resize factor
+static void image_pyramid(unsigned char *img, imPyramid *pyr, int width,
+                          int height, int stride, int n_levels) {
+  int i;
+  int max_levels = 1;
+  int scaled = width < height ? (width/MIN_LEVEL_SIZE) :
+               (height/MIN_LEVEL_SIZE);
+  pyr->widths[0] = width;
+  pyr->heights[0] = height;
+  pyr->strides[0] = stride;
+
+  if (n_levels == 1) {
+    pyr->n_levels = 1;
+    pyr->levels = (unsigned char**)safe_malloc(sizeof(*pyr->levels));
+    pyr->levels[0] = img;
+    return;
+  }
+  // compute number of possible levels of the pyramid. The smallest dim of the
+  // smallest level should be no less than 30 px.
+  while (scaled > 1) {
+    max_levels++;
+    scaled >>= 1;
+  }
+
+  if (n_levels > max_levels)
+    n_levels = max_levels;
+
+  pyr->n_levels = n_levels;
+  pyr->levels = (unsigned char**)safe_malloc(n_levels * sizeof(*pyr->levels));
+  pyr->levels[0] = img;
+
+  // compute each level
+  for (i = 1; i < n_levels; ++i) {
+    pyr->widths[i] = pyr->widths[i-1] >> 1;
+    pyr->heights[i] = pyr->heights[i-1] >> 1;
+    pyr->strides[i] = pyr->widths[i];
+    pyr->levels[i] = (unsigned char*)safe_malloc(pyr->widths[i] *
+                                                 pyr->heights[i] *
+                                                 sizeof(*pyr->levels[i]));
+
+    vp9_resize_plane(pyr->levels[i-1], pyr->heights[i-1], pyr->widths[i-1],
+                     pyr->strides[i-1], pyr->levels[i], pyr->heights[i],
+                     pyr->widths[i], pyr->strides[i]);
+  }
+}
+
+static void destruct_pyramid(imPyramid *pyr) {
+  int i;
+
+  // start at level 1 because level 0 is a reference to the original image
+  for (i = 1; i < pyr->n_levels; ++i)
+    free(pyr->levels[i]);
+  free(pyr->levels);
+}
+
+// Convolution with reflected content padding to minimize edge artifacts.
+static void convolve_char(double *filter, unsigned char *image,
+                          double *convolved, int filt_width, int filt_height,
+                          int image_width, int image_height, int image_stride,
+                          int convolved_stride) {
+  int i, j, x, y, imx, imy;
+  double pixel_val;
+  double filter_val;
+  double image_val;
+  int x_pad_size = filt_width >> 1;
+  int y_pad_size = filt_height >> 1;
+  for (j = 0; j < image_height; ++j)
+    for (i = 0; i < image_width; ++i) {
+      pixel_val = 0;
+      for (y = (-y_pad_size); y<= y_pad_size; y++)
+        for (x = (-x_pad_size); x <= x_pad_size; x++) {
+          filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
+          imx = WRAP_PIXEL(i + x, image_width);
+          imy = WRAP_PIXEL(j + y, image_height);
+          image_val = image[imx + imy * image_stride];
+          pixel_val += (filter_val * image_val);
+        }
+      convolved[i + j * convolved_stride] =  pixel_val;
+    }
+}
+
+// Convolution with reflected content padding to minimize edge artifacts.
+static void convolve_double(double *filter, double *image, double *convolved,
+                            int filt_width, int filt_height, int image_width,
+                            int image_height, int image_stride,
+                            int convolved_stride) {
+  int i, j, x, y, imx, imy;
+  double pixel_val;
+  double filter_val;
+  double image_val;
+  int x_pad_size = filt_width >> 1;
+  int y_pad_size = filt_height >> 1;
+  for (j = 0; j < image_height; ++j)
+    for (i = 0; i < image_width; ++i) {
+      pixel_val = 0;
+      for (y = (-y_pad_size); y<= y_pad_size; y++)
+        for (x = (-x_pad_size); x <= x_pad_size; x++) {
+          filter_val = filter[(x + x_pad_size) + (y + y_pad_size) * filt_width];
+          imx = WRAP_PIXEL(i + x, image_width);
+          imy = WRAP_PIXEL(j + y, image_height);
+          image_val = image[imx + imy * image_stride];
+          pixel_val += (filter_val * image_val);
+        }
+      convolved[i + j * convolved_stride] = pixel_val;
+    }
+}
+
+// computes x and y spatial derivatives of an image.
+static void differentiate(double *img, int width,  int height,
+                          int stride, double *dx_img,  double *dy_img,
+                          int deriv_stride) {
+  const double spatial_deriv_kernel[] = {-0.0833, 0.6667, 0.0,
+                                         -0.6667, 0.0833};
+  convolve_double(spatial_deriv_kernel, img, dx_img, 5, 1, width, height,
+                  stride, deriv_stride);
+  convolve_double(spatial_deriv_kernel, img, dy_img, 1, 5, width, height,
+                  stride, deriv_stride);
+}
+
+// creates a 2D gaussian kernel
+static void gaussian_kernel(double *mat, int size, double sig, double amp) {
+  int x, y;
+  double filter_val;
+  int half;
+  double denominator = 1 / (2.0 * sig * sig);
+  double sum = 0.0f;
+
+  size |= 1;
+  half = size >> 1;
+  for (y = -half; y<= half; y++)
+    for (x = -half; x<= half; x++) {
+      filter_val = amp * exp((double)(-1.0 * ((x * x + y * y) * denominator)));
+      mat[(x + half) + ((y + half) * size)] = filter_val;
+      sum += filter_val;
+    }
+
+  // normalize the filter
+  if (sum > MAX_ERROR) {
+    sum = 1 / sum;
+    for (x = 0; x < size * size; x++) {
+      mat[x] = mat[x] * sum;
+    }
+  }
+}
+
+// blurs image with a gaussian kernel
+static void blur_img(unsigned char *img, int width, int height,
+                     int stride, double *smooth_img, int smooth_stride,
+                     int smoothing_size, double smoothing_sig,
+                     double smoothing_amp) {
+  double *smoothing_kernel = (double *)safe_malloc(smoothing_size *
+                                                   smoothing_size *
+                                                   sizeof(*smoothing_kernel));
+  gaussian_kernel(smoothing_kernel, smoothing_size, smoothing_sig,
+                  smoothing_amp);
+  convolve_char(smoothing_kernel, img, smooth_img, smoothing_size,
+                smoothing_size, width, height, stride, smooth_stride);
+  free(smoothing_kernel);
+}
+
+// Implementation of Hoare's select for linear time median selection.
+// Takes in a pointer to the beginning of a list of values, the length
+// of the list, an integer representing the index of the number to be
+// selected, and an unsigned integer used to seed a random number generator.
+static double selection(double *vals, int length, int k, unsigned int seed) {
+  int pivot_ind, x;
+  double pivot;
+  double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
+  int l = 0, e = 0, g = 0;
+  pivot_ind = (int) ((length / ((double)RAND_MAX)) * rand_r(&seed));
+  pivot = vals[pivot_ind];
+  for (x = 0; x < length; ++x) {
+    if (RELATIVE_ERROR(vals[x], pivot) <= MAX_ERROR) {
+      e++;
+    } else if (vals[x] < pivot) {
+      L[l] = vals[x];
+      l++;
+    } else if (vals[x] > pivot) {
+      G[g] = vals[x];
+      g++;
+    }
+  }
+
+  if (k <= l)
+    return selection(L, l, k, seed);
+  else if (k <= l + e)
+    return pivot;
+  else
+    return selection(G, g, k - l - e, seed);
+}
+
+// Performs median filtering for denoising. This implementation uses hoare's
+// select to find the median in a block. block_x, block_y are
+// the x and y coordinates of the center of the block in the source.
+static void median_filter(double *source, double *filtered, int block_size,
+                          int width, int height, int source_stride,
+                          int filtered_stride) {
+  int i, j, block_x, block_y, imx, imy;
+  double pivot, val;
+  int length = block_size * block_size;
+  double L[MAX_MEDIAN_LENGTH], G[MAX_MEDIAN_LENGTH];
+  int k = length >> 1 | 1;
+  int l, e, g;
+  int pad_size = block_size >> 1;
+  unsigned int seed = (unsigned int)*source;
+
+  // find the median within each block. Reflected content is used for padding.
+  for (block_y = 0; block_y < height; ++block_y)
+    for (block_x = 0; block_x < width; ++block_x) {
+      l = 0, e = 0, g = 0;
+      memset(L, 0, length * sizeof(*L));
+      memset(G, 0, length * sizeof(*G));
+      pivot = source[block_x + block_y * source_stride];
+      for (j = -pad_size; j <= pad_size; ++j)
+        for (i = -pad_size; i <= pad_size; ++i) {
+          imx = WRAP_PIXEL(i + block_x, width);
+          imy = WRAP_PIXEL(j + block_y, height);
+          val = source[imx + imy * source_stride];
+
+          // pulling out the first iteration of selection so we don't have
+          // to iterate through the block to flatten it and then
+          // iterate through those same values to put them into
+          // the L E G bins in selection. Ugly but more efficent.
+          if (RELATIVE_ERROR(val, pivot) <= MAX_ERROR) {
+            e++;
+          } else if (val < pivot) {
+            L[l] = val;
+            l++;
+          } else if (val > pivot) {
+            G[g] = val;
+            g++;
+          }
+        }
+      if (k <= l)
+        filtered[block_x + block_y * filtered_stride] =
+            selection(L, l, k, seed);
+      else if (k <= l + e)
+        filtered[block_x + block_y * filtered_stride] = pivot;
+      else
+        filtered[block_x + block_y * filtered_stride] = selection(G, g, k -
+                                                                  l - e, seed);
+  }
+}
+
+static inline void pointwise_matrix_sub(double *mat1, double *mat2,
+                                        double *diff, int width, int height,
+                                        int stride1, int stride2,
+                                        int diffstride) {
+  int i, j;
+  for (j = 0; j < height; ++j)
+    for (i = 0; i < width; ++i) {
+      diff[i + j * diffstride] = mat1[i + j * stride1] - mat2[i + j * stride2];
+    }
+}
+
+static inline void pointwise_matrix_add(double *mat1, double *mat2,
+                                        double *sum, int width, int height,
+                                        int stride1, int stride2,
+                                        int sumstride) {
+  int i, j;
+  for (j = 0; j < height; ++j)
+    for (i = 0; i < width; ++i) {
+      sum[i + j * sumstride] = mat1[i + j * stride1] + mat2[i + j * stride2];
+    }
+}
+
+// Solves lucas kanade equation at any give pixel in the image using a local
+// neighborhood for support. loc_x and loc_y are the x and y components
+// of the center of the local neighborhood. Assumes dx, dy and dt have same
+// stride. window is a wind_size x wind_size set of weights for the
+// window_size x window_size neighborhood surrounding loc_x and loc_y.
+static void optical_flow_per_pixel(double *dx, double *dy, double *dt,
+                                   double *window, int wind_size,
+                                   flowMV *flow, int width, int height,
+                                   int stride, int loc_x, int loc_y) {
+  int i, j, iw, jw, im_ind;
+  double g = 0;
+  // M and b are matrices used to solve the equation a = M^-1 * b where a
+  // are the desired optical flow parameters
+  double M[4] = {0, 0, 0, 0};
+  double b[2] = {0, 0};
+  double det = 0;
+  double trace2 = 0;
+  double corner_score = 0;
+  int step = wind_size >> 1;
+  double gdx = 0, gdy = 0;
+
+  for (j = loc_y - step; j <= loc_y + step; ++j)
+    for (i = loc_x - step; i <= loc_x + step; ++i) {
+      // if pixel is out of bounds, use reflected image content
+      iw = WRAP_PIXEL(i, width);
+      jw = WRAP_PIXEL(j, height);
+      im_ind = iw + jw * stride;
+      g = window[(i - loc_x + step) + (j - loc_y + step) * wind_size];
+      gdx = g * dx[im_ind];
+      gdy = g * dy[im_ind];
+      M[0] += gdx * dx[im_ind];
+      M[1] += gdx * dy[im_ind];
+      M[3] += gdy * dy[im_ind];
+      b[0] += -gdx * dt[im_ind];
+      b[1] += -gdy * dt[im_ind];
+    }
+  M[2] = M[1];
+  det = (M[0] * M[3]) - (M[1] * M[1]);
+  trace2 = (M[0] + M[3]) * (M[0] + M[3]);
+  if (RELATIVE_ERROR(det, 0) > MAX_ERROR) {
+    const double det_inv = 1 / det;
+    const double mult_b0 = det_inv * b[0];
+    const double mult_b1 = det_inv * b[1];
+    flow->u[loc_x + loc_y * flow->stride] = M[3] * mult_b0 - M[1] * mult_b1;
+    flow->v[loc_x + loc_y * flow->stride] = -M[2] * mult_b0 + M[0] * mult_b1;
+  } else {
+    if (M[0] == 0 && M[3] == 0) {
+      flow->u[loc_x + loc_y * flow->stride] = 0;
+      flow->v[loc_x + loc_y * flow->stride] = 0;
+    } else {
+      const double trace2_inv = 1 / trace2;
+      const double mult_b0 = trace2_inv * b[0];
+      const double mult_b1 = trace2_inv * b[1];
+      flow->u[loc_x + loc_y * flow->stride] = M[0] * mult_b0 + M[1] * mult_b1;
+      flow->v[loc_x + loc_y * flow->stride] = M[2] * mult_b0 + M[3] * mult_b1;
+    }
+  }
+  // compute inverse harris corner score as confidence metric
+  corner_score = 1 / (det - (0.01 * trace2));
+  flow->confidence[loc_x + loc_y * flow->stride] = corner_score > 2 ?
+                                                   2 : corner_score;
+}
+
+// computes lucas kanade optical flow. Note that this assumes images that
+// already denoised and differentiated.
+static void lucas_kanade_base(double *smooth_frm, double *smooth_ref,
+                              double *dx, double *dy, double *dt, flowMV *flow,
+                              int width, int height, int smooth_frm_stride,
+                              int smooth_ref_stride, int deriv_stride) {
+  int i, j;
+  const int local_neighborhood_sz = 5;
+  // windowing function for local neighborhood weights
+  const double window[] = {0.0039, 0.0156, 0.0234, 0.0156, 0.0039,
+                           0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
+                           0.0234, 0.0938, 0.1406, 0.0938, 0.0234,
+                           0.0156, 0.0625, 0.0938, 0.0625, 0.0156,
+                           0.0039, 0.0156, 0.0234, 0.0156, 0.0039};
+  // compute temporal derivative
+  pointwise_matrix_sub(smooth_ref, smooth_frm, dt, width, height,
+                       smooth_ref_stride, smooth_frm_stride, deriv_stride);
+
+  for (j = 0; j < height; ++j)
+    for (i = 0; i < width; ++i) {
+      optical_flow_per_pixel(dx, dy, dt, window, local_neighborhood_sz,
+                             flow, width, height, deriv_stride, i, j);
+    }
+}
+
+// Improves an initial approximation for the vector field by iteratively
+// warping one image towards the other.
+static void iterative_refinement(unsigned char *ref, double *smooth_frm,
+                                 double *dx, double *dy, double *dt,
+                                 flowMV *flow, int width, int height,
+                                 int ref_stride, int smooth_frm_stride,
+                                 int deriv_stride, int n_refinements) {
+  int i, j, n;
+  double x, y;
+  unsigned char *estimate = (unsigned char*)safe_malloc(width * height *
+                                                        sizeof(*estimate));
+  double *smooth_estimate = (double*)safe_malloc(width * height *
+                                                 sizeof(*smooth_estimate));
+  flowMV new_flow;
+  new_flow.u = (double*)safe_malloc(width * height * sizeof(*new_flow.u));
+  new_flow.v = (double*)safe_malloc(width * height * sizeof(*new_flow.v));
+  new_flow.confidence = (double*)safe_malloc(width * height *
+                                             sizeof(*new_flow.confidence));
+  new_flow.stride = width;
+  // warp one image toward the other
+  for (n = 0; n < n_refinements; ++n) {
+    for (j = 0; j < height; ++j)
+      for (i = 0; i < width; ++i) {
+        x = i - flow->u[i + j * flow->stride];
+        y = j - flow->v[i + j * flow->stride];
+        estimate[i + j * width] = interpolate(ref, x, y, width,
+                                              height, ref_stride);
+      }
+
+    // compute flow between frame and warped estimate
+    blur_img(estimate, width, height, width, smooth_estimate, width, BLUR_SIZE,
+             BLUR_SIGMA, BLUR_AMP);
+    lucas_kanade_base(smooth_frm, smooth_estimate, dx, dy, dt, &new_flow,
+                      width, height, smooth_frm_stride, width, deriv_stride);
+
+    // add residual and apply median filter for denoising
+    pointwise_matrix_add(flow->u, new_flow.u, new_flow.u, width, height,
+                         flow->stride, new_flow.stride, new_flow.stride);
+    pointwise_matrix_add(flow->v, new_flow.v, new_flow.v, width, height,
+                         flow->stride, new_flow.stride, new_flow.stride);
+    median_filter(new_flow.u, flow->u, MEDIAN_FILTER_SIZE, width, height,
+                  new_flow.stride, flow->stride);
+    median_filter(new_flow.v, flow->v, MEDIAN_FILTER_SIZE, width, height,
+                  new_flow.stride, flow->stride);
+    median_filter(new_flow.confidence, flow->confidence, MEDIAN_FILTER_SIZE,
+                  width, height, new_flow.stride, flow->stride);
+  }
+
+  free(smooth_estimate);
+  free(estimate);
+  free(new_flow.u);
+  free(new_flow.v);
+  free(new_flow.confidence);
+}
+
+// interface for computing optical flow.
+void compute_flow(unsigned char *frm, unsigned char *ref,
+                  double *u, double *v, double *confidence,
+                  int width, int height, int frm_stride,
+                  int ref_stride, int n_levels) {
+  double *smooth_ref = (double*)safe_malloc(width * height *
+                                            sizeof(*smooth_ref));
+  double *smooth_frm = (double*)safe_malloc(width * height *
+                                            sizeof(*smooth_frm));
+  double *dx_frm = (double*)safe_malloc(width * height *
+                                        sizeof(*dx_frm));
+  double *dy_frm = (double*)safe_malloc(width * height *
+                                       sizeof(*dx_frm));
+  double *dt = (double *)safe_malloc(width * height * sizeof(*dt));
+  flowMV flow;
+  flow.u = u;
+  flow.v = v;
+  flow.confidence = confidence;
+  flow.stride = width;
+  if (n_levels == 1) {
+    // Uses lucas kanade and iterative estimation without coarse to fine.
+    // This is more efficient for small motion but becomes less accurate
+    // as motion gets larger.
+    blur_img(ref, width, height, ref_stride, smooth_ref, width, BLUR_SIZE,
+             BLUR_SIGMA, BLUR_AMP);
+    blur_img(frm, width, height, frm_stride, smooth_frm, width, BLUR_SIZE,
+             BLUR_SIGMA, BLUR_AMP);
+    differentiate(smooth_frm, width, height, width, dx_frm, dy_frm, width);
+    lucas_kanade_base(smooth_frm, smooth_ref, dx_frm, dy_frm, dt, &flow,
+                      width, height, width, width, width);
+    iterative_refinement(ref, smooth_frm, dx_frm, dy_frm, dt, &flow, width,
+                         height, ref_stride, width, width, ITERATIONS);
+  } else {
+    int i, j, k;
+    // w, h, s_r, s_f are intermediate width, height, ref stride, frame stride
+    // as the resolution changes up the pyramid
+    int w, h, s_r, s_f, w_upscale, h_upscale, s_upscale;
+    double x, y;
+    double *smooth_frm_approx = (double*)safe_malloc(width * height *
+                                             sizeof(*smooth_frm_approx));
+    double *dx_frm_approx = (double*)safe_malloc(width * height *
+                                             sizeof(*dx_frm_approx));
+    double *dy_frm_approx = (double*)safe_malloc(width * height *
+                                             sizeof(*dy_frm_approx));
+    double *u_upscale = (double*)safe_malloc(width * height *
+                                             sizeof(*u_upscale));
+    double *v_upscale = (double*)safe_malloc(width * height *
+                                             sizeof(*v_upscale));
+    unsigned char *frm_approx = (unsigned char*)safe_malloc(width * height *
+                                                           sizeof(*frm_approx));
+    imPyramid *frm_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
+    imPyramid *ref_pyramid = (imPyramid*)safe_malloc(sizeof(imPyramid));
+    // create image pyramids
+    image_pyramid(frm, frm_pyramid, width, height, frm_stride, n_levels);
+    image_pyramid(ref, ref_pyramid, width, height, ref_stride, n_levels);
+    n_levels = frm_pyramid->n_levels;
+    w = frm_pyramid->widths[n_levels - 1];
+    h = frm_pyramid->heights[n_levels - 1];
+    s_r = ref_pyramid->strides[n_levels - 1];
+    s_f = frm_pyramid->strides[n_levels - 1];
+
+    // for each level in the pyramid starting with the coarsest
+    for (i = n_levels - 1; i >= 0; --i) {
+      assert(frm_pyramid->widths[i] == ref_pyramid->widths[i]);
+      assert(frm_pyramid->heights[i] == ref_pyramid->heights[i]);
+      blur_img(ref_pyramid->levels[i], w, h, s_r, smooth_ref, width, BLUR_SIZE,
+               BLUR_SIGMA, BLUR_AMP);
+      blur_img(frm_pyramid->levels[i], w, h, s_f, smooth_frm, width, BLUR_SIZE,
+               BLUR_SIGMA, BLUR_AMP);
+      differentiate(smooth_frm, w, h, width, dx_frm, dy_frm, width);
+
+
+      //  Compute optical flow at this level between the reference frame and
+      //  the estimate produced from warping. If this is the first iteration
+      //  (meaning no warping has happened yet) then we have no approximation
+      //  for the frame and only have to worry about flow between the original
+      //  frame and reference. Every subsequent iteration requires computing
+      //  and estimate for the frame based on previously computed flow vectors.
+
+      if (i < n_levels - 1) {
+        blur_img(frm_approx, w, h, width, smooth_frm_approx, width, BLUR_SIZE,
+                 BLUR_SIGMA, BLUR_AMP);
+        differentiate(smooth_frm_approx, w, h, width, dx_frm_approx,
+                      dy_frm_approx, width);
+        lucas_kanade_base(smooth_frm_approx, smooth_ref, dx_frm_approx,
+                          dy_frm_approx, dt, &flow, w, h, width, width, width);
+        pointwise_matrix_add(flow.u, u_upscale, u_upscale, w, h, flow.stride,
+                             width, width);
+        pointwise_matrix_add(flow.v, v_upscale, v_upscale, w, h, flow.stride,
+                             width, width);
+        median_filter(u_upscale, flow.u, MEDIAN_FILTER_SIZE, w, h, width,
+                      flow.stride);
+        median_filter(v_upscale, flow.v, MEDIAN_FILTER_SIZE, w, h, width,
+                      flow.stride);
+      } else {
+        lucas_kanade_base(smooth_frm, smooth_ref, dx_frm,
+                          dy_frm, dt, &flow, w, h, width, width, width);
+      }
+      iterative_refinement(ref_pyramid->levels[i], smooth_frm, dx_frm, dy_frm,
+                           dt, &flow, w, h, s_r, width, width, ITERATIONS);
+
+
+      // if we're at the finest level, we're ready to return u and v
+      if (i == 0) {
+        assert(w == width);
+        assert(h == height);
+        destruct_pyramid(frm_pyramid);
+        destruct_pyramid(ref_pyramid);
+        free(frm_pyramid);
+        free(ref_pyramid);
+        free(frm_approx);
+        free(smooth_frm_approx);
+        free(dx_frm_approx);
+        free(dy_frm_approx);
+        free(u_upscale);
+        free(v_upscale);
+        break;
+      }
+
+      w_upscale = ref_pyramid->widths[i - 1];
+      h_upscale = ref_pyramid->heights[i - 1];
+      s_upscale = ref_pyramid->strides[i - 1];
+
+      // warp image according to optical flow estimate
+      for (j = 0; j < h_upscale; ++j)
+        for (k = 0; k < w_upscale; ++k) {
+          u_upscale[k + j * w_upscale] = flow.u[(int)(k >> 1) +
+                                                 (int)(j >> 1) * flow.stride];
+          v_upscale[k + j * w_upscale] = flow.v[(int)(k >> 1) +
+                                                 (int)(j >> 1) * flow.stride];
+          x = k - u_upscale[k + j * w_upscale];
+          y = j - v_upscale[k + j * w_upscale];
+          frm_approx[k + j * width] = interpolate(ref_pyramid->levels[i - 1],
+                                                  x, y, w_upscale, h_upscale,
+                                                  s_upscale);
+        }
+
+      // assign dimensions for next level
+      w = frm_pyramid->widths[i - 1];
+      h = frm_pyramid->heights[i - 1];
+      s_r = ref_pyramid->strides[i - 1];
+      s_f = frm_pyramid->strides[i - 1];
+    }
+  }
+  free(smooth_ref);
+  free(smooth_frm);
+  free(dx_frm);
+  free(dy_frm);
+  free(dt);
+}
--- a/vp9/encoder/vp9_opticalflow.h
+++ b/vp9/encoder/vp9_opticalflow.h
@ -0,0 +1,28 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be
+ *  found  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+  Top level interface for computing optical flow. This function
+  takes in two images and returns the pixelwise motion field that
+  describes the motion between the two images. n_levels corresponds
+  to the number of levels of an image pyramid to use for coarse to fine
+  refinement. If no coarse to fine refinement is desired, this should be
+  set to 1.
+
+  Note that the warping needs to be done by SUBTRACTING the flow vectors
+  from the locations in the second image passed in (ref) in order to produce
+  a prediction for the first image passed in (frm).
+
+  To compute the motion vectors for a single pixel, see the function
+  optical_flow_per_pixel in vp9_opticalflow.c.
+*/
+void compute_flow(unsigned char *frm, unsigned char *ref,
+                  double *u, double *v, double *confidence, int width,
+                  int height, int frm_stride, int ref_stride, int n_levels);
--- a/vp9/encoder/vp9_ransac.c
+++ b/vp9/encoder/vp9_ransac.c
@ -46,16 +46,16 @@ static inline double PYTHAG(double a, double b) {
  }
 }

-inline int IMIN(int a, int b) {
+int IMIN(int a, int b) {
  return (((a) < (b)) ? (a) : (b));
 }

-inline int IMAX(int a, int b) {
+int IMAX(int a, int b) {
  return (((a) < (b)) ? (b) : (a));
 }

-static void MultiplyMat(double *m1, double *m2, double *res,
-                        const int M1, const int N1, const int N2) {
+void MultiplyMat(double *m1, double *m2, double *res,
+                 const int M1, const int N1, const int N2) {
  int timesInner = N1;
  int timesRows = M1;
  int timesCols = N2;
--- a/vp9/encoder/vp9_ransac.h
+++ b/vp9/encoder/vp9_ransac.h
@ -35,4 +35,8 @@ int ransacTranslation(double *matched_points, int npoints,
                      int *number_of_inliers, int *best_inlier_indices,
                      double *bestH);

+void MultiplyMat(double *m1, double *m2, double *res,
+                 const int M1, const int N1, const int N2);
+int PseudoInverse(double *inv, double *matx, const int M, const int N);
+
 #endif  // VP9_ENCODER_VP9_RANSAC_H
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@ -81,6 +81,8 @@ VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.c
 VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_global_motion.h
 VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.c
 VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_motion_field.h
+VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_opticalflow.c
+VP9_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/vp9_opticalflow.h
 VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.c
 VP9_CX_SRCS-$(CONFIG_WAVELETS) += encoder/vp9_dwt.h
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c