Merge "vpx_scale: sync from experimental"

2012-11-02 09:16:41 -07:00 · 2012-11-02 09:16:41 -07:00 · 3b783d2217
commit 3b783d2217
parent 8a6a7ae292 9e06601db6
12 changed files with 2640 additions and 2822 deletions
--- a/vpx_scale/arm/neon/yv12extend_arm.c
+++ b/vpx_scale/arm/neon/yv12extend_arm.c
@ -8,15 +8,14 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "vpx_rtcd.h"
+#include "./vpx_rtcd.h"

 extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc,
                                          struct yv12_buffer_config *dst_ybc);

 void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc,
-                              struct yv12_buffer_config *dst_ybc)
-{
-    vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
+                              struct yv12_buffer_config *dst_ybc) {
+  vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);

-    vp8_yv12_extend_frame_borders_neon(dst_ybc);
+  vp8_yv12_extend_frame_borders_neon(dst_ybc);
 }
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@ -46,557 +46,524 @@ static float a = -0.6;
 //         3     2
 // C0 = a*t - a*t
 //
-static short c0_fixed(unsigned int t)
-{
-    // put t in Q16 notation
-    unsigned short v1, v2;
+static short c0_fixed(unsigned int t) {
+  // put t in Q16 notation
+  unsigned short v1, v2;

-    // Q16
-    v1 = (a_i * t) >> 16;
-    v1 = (v1 * t) >> 16;
+  // Q16
+  v1 = (a_i * t) >> 16;
+  v1 = (v1 * t) >> 16;

-    // Q16
-    v2 = (a_i * t) >> 16;
-    v2 = (v2 * t) >> 16;
-    v2 = (v2 * t) >> 16;
+  // Q16
+  v2 = (a_i * t) >> 16;
+  v2 = (v2 * t) >> 16;
+  v2 = (v2 * t) >> 16;

-    // Q12
-    return -((v1 - v2) >> 4);
+  // Q12
+  return -((v1 - v2) >> 4);
 }

 //                     2          3
 // C1 = a*t + (3-2*a)*t  - (2-a)*t
 //
-static short c1_fixed(unsigned int t)
-{
-    unsigned short v1, v2, v3;
-    unsigned short two, three;
+static short c1_fixed(unsigned int t) {
+  unsigned short v1, v2, v3;
+  unsigned short two, three;

-    // Q16
-    v1 = (a_i * t) >> 16;
+  // Q16
+  v1 = (a_i * t) >> 16;

-    // Q13
-    two = 2 << 13;
-    v2 = two - (a_i >> 3);
-    v2 = (v2 * t) >> 16;
-    v2 = (v2 * t) >> 16;
-    v2 = (v2 * t) >> 16;
+  // Q13
+  two = 2 << 13;
+  v2 = two - (a_i >> 3);
+  v2 = (v2 * t) >> 16;
+  v2 = (v2 * t) >> 16;
+  v2 = (v2 * t) >> 16;

-    // Q13
-    three = 3 << 13;
-    v3 = three - (2 * (a_i >> 3));
-    v3 = (v3 * t) >> 16;
-    v3 = (v3 * t) >> 16;
+  // Q13
+  three = 3 << 13;
+  v3 = three - (2 * (a_i >> 3));
+  v3 = (v3 * t) >> 16;
+  v3 = (v3 * t) >> 16;

-    // Q12
-    return (((v1 >> 3) - v2 + v3) >> 1);
+  // Q12
+  return (((v1 >> 3) - v2 + v3) >> 1);

 }

 //                 2          3
 // C2 = 1 - (3-a)*t  + (2-a)*t
 //
-static short c2_fixed(unsigned int t)
-{
-    unsigned short v1, v2, v3;
-    unsigned short two, three;
+static short c2_fixed(unsigned int t) {
+  unsigned short v1, v2, v3;
+  unsigned short two, three;

-    // Q13
-    v1 = 1 << 13;
+  // Q13
+  v1 = 1 << 13;

-    // Q13
-    three = 3 << 13;
-    v2 = three - (a_i >> 3);
-    v2 = (v2 * t) >> 16;
-    v2 = (v2 * t) >> 16;
+  // Q13
+  three = 3 << 13;
+  v2 = three - (a_i >> 3);
+  v2 = (v2 * t) >> 16;
+  v2 = (v2 * t) >> 16;

-    // Q13
-    two = 2 << 13;
-    v3 = two - (a_i >> 3);
-    v3 = (v3 * t) >> 16;
-    v3 = (v3 * t) >> 16;
-    v3 = (v3 * t) >> 16;
+  // Q13
+  two = 2 << 13;
+  v3 = two - (a_i >> 3);
+  v3 = (v3 * t) >> 16;
+  v3 = (v3 * t) >> 16;
+  v3 = (v3 * t) >> 16;

-    // Q12
-    return (v1 - v2 + v3) >> 1;
+  // Q12
+  return (v1 - v2 + v3) >> 1;
 }

 //                 2      3
 // C3 = a*t - 2*a*t  + a*t
 //
-static short c3_fixed(unsigned int t)
-{
-    int v1, v2, v3;
+static short c3_fixed(unsigned int t) {
+  int v1, v2, v3;

-    // Q16
-    v1 = (a_i * t) >> 16;
+  // Q16
+  v1 = (a_i * t) >> 16;

-    // Q15
-    v2 = 2 * (a_i >> 1);
-    v2 = (v2 * t) >> 16;
-    v2 = (v2 * t) >> 16;
+  // Q15
+  v2 = 2 * (a_i >> 1);
+  v2 = (v2 * t) >> 16;
+  v2 = (v2 * t) >> 16;

-    // Q16
-    v3 = (a_i * t) >> 16;
-    v3 = (v3 * t) >> 16;
-    v3 = (v3 * t) >> 16;
+  // Q16
+  v3 = (a_i * t) >> 16;
+  v3 = (v3 * t) >> 16;
+  v3 = (v3 * t) >> 16;

-    // Q12
-    return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
+  // Q12
+  return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
 }
 #else
 //          3     2
 // C0 = -a*t + a*t
 //
-float C0(float t)
-{
-    return -a * t * t * t + a * t * t;
+float C0(float t) {
+  return -a * t * t * t + a * t * t;
 }

 //                      2          3
 // C1 = -a*t + (2*a+3)*t  - (a+2)*t
 //
-float C1(float t)
-{
-    return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
+float C1(float t) {
+  return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
 }

 //                 2          3
 // C2 = 1 - (a+3)*t  + (a+2)*t
 //
-float C2(float t)
-{
-    return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
+float C2(float t) {
+  return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
 }

 //                 2      3
 // C3 = a*t - 2*a*t  + a*t
 //
-float C3(float t)
-{
-    return a * t * t * t - 2.0f * a * t * t + a * t;
+float C3(float t) {
+  return a * t * t * t - 2.0f * a * t * t + a * t;
 }
 #endif

 #if 0
-int compare_real_fixed()
-{
-    int i, errors = 0;
-    float mult = 1.0 / 10000.0;
-    unsigned int fixed_mult = mult * 4294967296;//65536;
-    unsigned int phase_offset_int;
-    float phase_offset_real;
+int compare_real_fixed() {
+  int i, errors = 0;
+  float mult = 1.0 / 10000.0;
+  unsigned int fixed_mult = mult * 4294967296;// 65536;
+  unsigned int phase_offset_int;
+  float phase_offset_real;

-    for (i = 0; i < 10000; i++)
-    {
-        int fixed0, fixed1, fixed2, fixed3, fixed_total;
-        int real0, real1, real2, real3, real_total;
+  for (i = 0; i < 10000; i++) {
+    int fixed0, fixed1, fixed2, fixed3, fixed_total;
+    int real0, real1, real2, real3, real_total;

-        phase_offset_real = (float)i * mult;
-        phase_offset_int = (fixed_mult * i) >> 16;
+    phase_offset_real = (float)i * mult;
+    phase_offset_int = (fixed_mult * i) >> 16;
 //      phase_offset_int = phase_offset_real * 65536;

-        fixed0 = c0_fixed(phase_offset_int);
-        real0 = C0(phase_offset_real) * 4096.0;
+    fixed0 = c0_fixed(phase_offset_int);
+    real0 = C0(phase_offset_real) * 4096.0;

-        if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
-            errors++;
+    if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
+      errors++;

-        fixed1 = c1_fixed(phase_offset_int);
-        real1 = C1(phase_offset_real) * 4096.0;
+    fixed1 = c1_fixed(phase_offset_int);
+    real1 = C1(phase_offset_real) * 4096.0;

-        if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
-            errors++;
+    if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
+      errors++;

-        fixed2 = c2_fixed(phase_offset_int);
-        real2 = C2(phase_offset_real) * 4096.0;
+    fixed2 = c2_fixed(phase_offset_int);
+    real2 = C2(phase_offset_real) * 4096.0;

-        if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
-            errors++;
+    if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
+      errors++;

-        fixed3 = c3_fixed(phase_offset_int);
-        real3 = C3(phase_offset_real) * 4096.0;
+    fixed3 = c3_fixed(phase_offset_int);
+    real3 = C3(phase_offset_real) * 4096.0;

-        if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
-            errors++;
+    if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
+      errors++;

-        fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
-        real_total = real0 + real1 + real2 + real3;
+    fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
+    real_total = real0 + real1 + real2 + real3;

-        if ((fixed_total > 4097) || (fixed_total < 4094))
-            errors ++;
+    if ((fixed_total > 4097) || (fixed_total < 4094))
+      errors++;

-        if ((real_total > 4097) || (real_total < 4095))
-            errors ++;
-    }
+    if ((real_total > 4097) || (real_total < 4095))
+      errors++;
+  }

-    return errors;
+  return errors;
 }
 #endif

 // Find greatest common denominator between two integers.  Method used here is
 //  slow compared to Euclid's algorithm, but does not require any division.
-int gcd(int a, int b)
-{
-    // Problem with this algorithm is that if a or b = 0 this function
-    //  will never exit.  Don't want to return 0 because any computation
-    //  that was based on a common denoninator and tried to reduce by
-    //  dividing by 0 would fail.  Best solution that could be thought of
-    //  would to be fail by returing a 1;
-    if (a <= 0 || b <= 0)
-        return 1;
+int gcd(int a, int b) {
+  // Problem with this algorithm is that if a or b = 0 this function
+  //  will never exit.  Don't want to return 0 because any computation
+  //  that was based on a common denoninator and tried to reduce by
+  //  dividing by 0 would fail.  Best solution that could be thought of
+  //  would to be fail by returing a 1;
+  if (a <= 0 || b <= 0)
+    return 1;

-    while (a != b)
-    {
-        if (b > a)
-            b = b - a;
-        else
-        {
-            int tmp = a;//swap large and
-            a = b; //small
-            b = tmp;
-        }
+  while (a != b) {
+    if (b > a)
+      b = b - a;
+    else {
+      int tmp = a;// swap large and
+      a = b; // small
+      b = tmp;
    }
+  }

-    return b;
+  return b;
 }

-void bicubic_coefficient_init()
-{
-    vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
-    g_first_time = 0;
+void bicubic_coefficient_init() {
+  vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
+  g_first_time = 0;
 }

-void bicubic_coefficient_destroy()
-{
-    if (!g_first_time)
-    {
-        vpx_free(g_b_scaler.l_w);
-
-        vpx_free(g_b_scaler.l_h);
-
-        vpx_free(g_b_scaler.l_h_uv);
-
-        vpx_free(g_b_scaler.c_w);
-
-        vpx_free(g_b_scaler.c_h);
-
-        vpx_free(g_b_scaler.c_h_uv);
-
-        vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
-    }
-}
-
-// Create the coeffients that will be used for the cubic interpolation.
-//  Because scaling does not have to be equal in the vertical and horizontal
-//  regimes the phase offsets will be different.  There are 4 coefficents
-//  for each point, two on each side.  The layout is that there are the
-//  4 coefficents for each phase in the array and then the next phase.
-int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height)
-{
-    int i;
-#ifdef FIXED_POINT
-    int phase_offset_int;
-    unsigned int fixed_mult;
-    int product_val = 0;
-#else
-    float phase_offset;
-#endif
-    int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
-
-    if (g_first_time)
-        bicubic_coefficient_init();
-
-
-    // check to see if the coefficents have already been set up correctly
-    if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
-        && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
-        return 0;
-
-    g_b_scaler.in_width = in_width;
-    g_b_scaler.in_height = in_height;
-    g_b_scaler.out_width = out_width;
-    g_b_scaler.out_height = out_height;
-
-    // Don't want to allow crazy scaling, just try and prevent a catastrophic
-    //  failure here.  Want to fail after setting the member functions so if
-    //  if the scaler is called the member functions will not scale.
-    if (out_width <= 0 || out_height <= 0)
-        return -1;
-
-    // reduce in/out width and height ratios using the gcd
-    gcd_w = gcd(out_width, in_width);
-    gcd_h = gcd(out_height, in_height);
-    gcd_h_uv = gcd(out_height, in_height / 2);
-
-    // the numerator width and height are to be saved in
-    //  globals so they can be used during the scaling process
-    //  without having to be recalculated.
-    g_b_scaler.nw = out_width / gcd_w;
-    d_w = in_width / gcd_w;
-
-    g_b_scaler.nh = out_height / gcd_h;
-    d_h = in_height / gcd_h;
-
-    g_b_scaler.nh_uv = out_height / gcd_h_uv;
-    d_h_uv = (in_height / 2) / gcd_h_uv;
-
-    // allocate memory for the coefficents
+void bicubic_coefficient_destroy() {
+  if (!g_first_time) {
    vpx_free(g_b_scaler.l_w);

    vpx_free(g_b_scaler.l_h);

    vpx_free(g_b_scaler.l_h_uv);

-    g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
-    g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
-    g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
-
    vpx_free(g_b_scaler.c_w);

    vpx_free(g_b_scaler.c_h);

    vpx_free(g_b_scaler.c_h_uv);

-    g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
-    g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
-    g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
+    vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
+  }
+}

-    g_b_scaler.hbuf = g_hbuf;
-    g_b_scaler.hbuf_uv = g_hbuf_uv;
-
-    // Set up polyphase filter taps.  This needs to be done before
-    //  the scaling because of the floating point math required.  The
-    //  coefficients are multiplied by 2^12 so that fixed point math
-    //  can be used in the main scaling loop.
+// Create the coeffients that will be used for the cubic interpolation.
+//  Because scaling does not have to be equal in the vertical and horizontal
+//  regimes the phase offsets will be different.  There are 4 coefficents
+//  for each point, two on each side.  The layout is that there are the
+//  4 coefficents for each phase in the array and then the next phase.
+int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) {
+  int i;
 #ifdef FIXED_POINT
-    fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
+  int phase_offset_int;
+  unsigned int fixed_mult;
+  int product_val = 0;
+#else
+  float phase_offset;
+#endif
+  int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;

-    product_val = 0;
-
-    for (i = 0; i < g_b_scaler.nw; i++)
-    {
-        if (product_val > g_b_scaler.nw)
-            product_val -= g_b_scaler.nw;
-
-        phase_offset_int = (fixed_mult * product_val) >> 16;
-
-        g_b_scaler.c_w[i*4]   = c3_fixed(phase_offset_int);
-        g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int);
-        g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int);
-        g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int);
-
-        product_val += d_w;
-    }
+  if (g_first_time)
+    bicubic_coefficient_init();


-    fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
+  // check to see if the coefficents have already been set up correctly
+  if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
+      && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
+    return 0;

-    product_val = 0;
+  g_b_scaler.in_width = in_width;
+  g_b_scaler.in_height = in_height;
+  g_b_scaler.out_width = out_width;
+  g_b_scaler.out_height = out_height;

-    for (i = 0; i < g_b_scaler.nh; i++)
-    {
-        if (product_val > g_b_scaler.nh)
-            product_val -= g_b_scaler.nh;
+  // Don't want to allow crazy scaling, just try and prevent a catastrophic
+  //  failure here.  Want to fail after setting the member functions so if
+  //  if the scaler is called the member functions will not scale.
+  if (out_width <= 0 || out_height <= 0)
+    return -1;

-        phase_offset_int = (fixed_mult * product_val) >> 16;
+  // reduce in/out width and height ratios using the gcd
+  gcd_w = gcd(out_width, in_width);
+  gcd_h = gcd(out_height, in_height);
+  gcd_h_uv = gcd(out_height, in_height / 2);

-        g_b_scaler.c_h[i*4]   = c0_fixed(phase_offset_int);
-        g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int);
-        g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int);
-        g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int);
+  // the numerator width and height are to be saved in
+  //  globals so they can be used during the scaling process
+  //  without having to be recalculated.
+  g_b_scaler.nw = out_width / gcd_w;
+  d_w = in_width / gcd_w;

-        product_val += d_h;
-    }
+  g_b_scaler.nh = out_height / gcd_h;
+  d_h = in_height / gcd_h;

-    fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
+  g_b_scaler.nh_uv = out_height / gcd_h_uv;
+  d_h_uv = (in_height / 2) / gcd_h_uv;

-    product_val = 0;
+  // allocate memory for the coefficents
+  vpx_free(g_b_scaler.l_w);

-    for (i = 0; i < g_b_scaler.nh_uv; i++)
-    {
-        if (product_val > g_b_scaler.nh_uv)
-            product_val -= g_b_scaler.nh_uv;
+  vpx_free(g_b_scaler.l_h);

-        phase_offset_int = (fixed_mult * product_val) >> 16;
+  vpx_free(g_b_scaler.l_h_uv);

-        g_b_scaler.c_h_uv[i*4]   = c0_fixed(phase_offset_int);
-        g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int);
-        g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int);
-        g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int);
+  g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
+  g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
+  g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);

-        product_val += d_h_uv;
-    }
+  vpx_free(g_b_scaler.c_w);
+
+  vpx_free(g_b_scaler.c_h);
+
+  vpx_free(g_b_scaler.c_h_uv);
+
+  g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
+  g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
+  g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
+
+  g_b_scaler.hbuf = g_hbuf;
+  g_b_scaler.hbuf_uv = g_hbuf_uv;
+
+  // Set up polyphase filter taps.  This needs to be done before
+  //  the scaling because of the floating point math required.  The
+  //  coefficients are multiplied by 2^12 so that fixed point math
+  //  can be used in the main scaling loop.
+#ifdef FIXED_POINT
+  fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
+
+  product_val = 0;
+
+  for (i = 0; i < g_b_scaler.nw; i++) {
+    if (product_val > g_b_scaler.nw)
+      product_val -= g_b_scaler.nw;
+
+    phase_offset_int = (fixed_mult * product_val) >> 16;
+
+    g_b_scaler.c_w[i * 4]   = c3_fixed(phase_offset_int);
+    g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int);
+    g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int);
+    g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int);
+
+    product_val += d_w;
+  }
+
+
+  fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
+
+  product_val = 0;
+
+  for (i = 0; i < g_b_scaler.nh; i++) {
+    if (product_val > g_b_scaler.nh)
+      product_val -= g_b_scaler.nh;
+
+    phase_offset_int = (fixed_mult * product_val) >> 16;
+
+    g_b_scaler.c_h[i * 4]   = c0_fixed(phase_offset_int);
+    g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int);
+    g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int);
+    g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int);
+
+    product_val += d_h;
+  }
+
+  fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
+
+  product_val = 0;
+
+  for (i = 0; i < g_b_scaler.nh_uv; i++) {
+    if (product_val > g_b_scaler.nh_uv)
+      product_val -= g_b_scaler.nh_uv;
+
+    phase_offset_int = (fixed_mult * product_val) >> 16;
+
+    g_b_scaler.c_h_uv[i * 4]   = c0_fixed(phase_offset_int);
+    g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int);
+    g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int);
+    g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int);
+
+    product_val += d_h_uv;
+  }

 #else

-    for (i = 0; i < g_nw; i++)
-    {
-        phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
-        g_c_w[i*4]   = (C3(phase_offset) * 4096.0);
-        g_c_w[i*4+1] = (C2(phase_offset) * 4096.0);
-        g_c_w[i*4+2] = (C1(phase_offset) * 4096.0);
-        g_c_w[i*4+3] = (C0(phase_offset) * 4096.0);
-    }
+  for (i = 0; i < g_nw; i++) {
+    phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
+    g_c_w[i * 4]   = (C3(phase_offset) * 4096.0);
+    g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0);
+    g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0);
+    g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0);
+  }

-    for (i = 0; i < g_nh; i++)
-    {
-        phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
-        g_c_h[i*4]   = (C0(phase_offset) * 4096.0);
-        g_c_h[i*4+1] = (C1(phase_offset) * 4096.0);
-        g_c_h[i*4+2] = (C2(phase_offset) * 4096.0);
-        g_c_h[i*4+3] = (C3(phase_offset) * 4096.0);
-    }
+  for (i = 0; i < g_nh; i++) {
+    phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
+    g_c_h[i * 4]   = (C0(phase_offset) * 4096.0);
+    g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0);
+    g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0);
+    g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0);
+  }

-    for (i = 0; i < g_nh_uv; i++)
-    {
-        phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
-        g_c_h_uv[i*4]   = (C0(phase_offset) * 4096.0);
-        g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0);
-        g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0);
-        g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0);
-    }
+  for (i = 0; i < g_nh_uv; i++) {
+    phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
+    g_c_h_uv[i * 4]   = (C0(phase_offset) * 4096.0);
+    g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0);
+    g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0);
+    g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0);
+  }

 #endif

-    // Create an array that corresponds input lines to output lines.
-    //  This doesn't require floating point math, but it does require
-    //  a division and because hardware division is not present that
-    //  is a call.
-    for (i = 0; i < out_width; i++)
-    {
-        g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
+  // Create an array that corresponds input lines to output lines.
+  //  This doesn't require floating point math, but it does require
+  //  a division and because hardware division is not present that
+  //  is a call.
+  for (i = 0; i < out_width; i++) {
+    g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;

-        if ((g_b_scaler.l_w[i] + 2) <= in_width)
-            g_b_scaler.max_usable_out_width = i;
+    if ((g_b_scaler.l_w[i] + 2) <= in_width)
+      g_b_scaler.max_usable_out_width = i;

-    }
+  }

-    for (i = 0; i < out_height + 1; i++)
-    {
-        g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
-        g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
-    }
+  for (i = 0; i < out_height + 1; i++) {
+    g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
+    g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
+  }

-    return 0;
+  return 0;
 }

 int bicubic_scale(int in_width, int in_height, int in_stride,
                  int out_width, int out_height, int out_stride,
-                  unsigned char *input_image, unsigned char *output_image)
-{
-    short *RESTRICT l_w, * RESTRICT l_h;
-    short *RESTRICT c_w, * RESTRICT c_h;
-    unsigned char *RESTRICT ip, * RESTRICT op;
-    unsigned char *RESTRICT hbuf;
-    int h, w, lw, lh;
-    int temp_sum;
-    int phase_offset_w, phase_offset_h;
+                  unsigned char *input_image, unsigned char *output_image) {
+  short *RESTRICT l_w, * RESTRICT l_h;
+  short *RESTRICT c_w, * RESTRICT c_h;
+  unsigned char *RESTRICT ip, * RESTRICT op;
+  unsigned char *RESTRICT hbuf;
+  int h, w, lw, lh;
+  int temp_sum;
+  int phase_offset_w, phase_offset_h;

-    c_w = g_b_scaler.c_w;
-    c_h = g_b_scaler.c_h;
+  c_w = g_b_scaler.c_w;
+  c_h = g_b_scaler.c_h;

-    op = output_image;
+  op = output_image;

-    l_w = g_b_scaler.l_w;
-    l_h = g_b_scaler.l_h;
+  l_w = g_b_scaler.l_w;
+  l_h = g_b_scaler.l_h;

-    phase_offset_h = 0;
+  phase_offset_h = 0;

-    for (h = 0; h < out_height; h++)
-    {
-        // select the row to work on
-        lh = l_h[h];
-        ip = input_image + (in_stride * lh);
+  for (h = 0; h < out_height; h++) {
+    // select the row to work on
+    lh = l_h[h];
+    ip = input_image + (in_stride * lh);

-        // vp8_filter the row vertically into an temporary buffer.
-        //  If the phase offset == 0 then all the multiplication
-        //  is going to result in the output equalling the input.
-        //  So instead point the temporary buffer to the input.
-        //  Also handle the boundry condition of not being able to
-        //  filter that last lines.
-        if (phase_offset_h && (lh < in_height - 2))
-        {
-            hbuf = g_b_scaler.hbuf;
+    // vp8_filter the row vertically into an temporary buffer.
+    //  If the phase offset == 0 then all the multiplication
+    //  is going to result in the output equalling the input.
+    //  So instead point the temporary buffer to the input.
+    //  Also handle the boundry condition of not being able to
+    //  filter that last lines.
+    if (phase_offset_h && (lh < in_height - 2)) {
+      hbuf = g_b_scaler.hbuf;

-            for (w = 0; w < in_width; w++)
-            {
-                temp_sum =  c_h[phase_offset_h*4+3] * ip[w - in_stride];
-                temp_sum += c_h[phase_offset_h*4+2] * ip[w];
-                temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride];
-                temp_sum += c_h[phase_offset_h*4]   * ip[w + 2*in_stride];
+      for (w = 0; w < in_width; w++) {
+        temp_sum =  c_h[phase_offset_h * 4 + 3] * ip[w - in_stride];
+        temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w];
+        temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride];
+        temp_sum += c_h[phase_offset_h * 4]   * ip[w + 2 * in_stride];

-                hbuf[w] = temp_sum >> 12;
-            }
-        }
-        else
-            hbuf = ip;
+        hbuf[w] = temp_sum >> 12;
+      }
+    } else
+      hbuf = ip;

-        // increase the phase offset for the next time around.
-        if (++phase_offset_h >= g_b_scaler.nh)
-            phase_offset_h = 0;
+    // increase the phase offset for the next time around.
+    if (++phase_offset_h >= g_b_scaler.nh)
+      phase_offset_h = 0;

-        // now filter and expand it horizontally into the final
-        //  output buffer
+    // now filter and expand it horizontally into the final
+    //  output buffer
+    phase_offset_w = 0;
+
+    for (w = 0; w < out_width; w++) {
+      // get the index to use to expand the image
+      lw = l_w[w];
+
+      temp_sum =  c_w[phase_offset_w * 4]   * hbuf[lw - 1];
+      temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw];
+      temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1];
+      temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2];
+      temp_sum = temp_sum >> 12;
+
+      if (++phase_offset_w >= g_b_scaler.nw)
        phase_offset_w = 0;

-        for (w = 0; w < out_width; w++)
-        {
-            // get the index to use to expand the image
-            lw = l_w[w];
+      // boundry conditions
+      if ((lw + 2) >= in_width)
+        temp_sum = hbuf[lw];

-            temp_sum =  c_w[phase_offset_w*4]   * hbuf[lw - 1];
-            temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw];
-            temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1];
-            temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2];
-            temp_sum = temp_sum >> 12;
+      if (lw == 0)
+        temp_sum = hbuf[0];

-            if (++phase_offset_w >= g_b_scaler.nw)
-                phase_offset_w = 0;
-
-            // boundry conditions
-            if ((lw + 2) >= in_width)
-                temp_sum = hbuf[lw];
-
-            if (lw == 0)
-                temp_sum = hbuf[0];
-
-            op[w] = temp_sum;
-        }
-
-        op += out_stride;
+      op[w] = temp_sum;
    }

-    return 0;
+    op += out_stride;
+  }
+
+  return 0;
 }

-void bicubic_scale_frame_reset()
-{
-    g_b_scaler.out_width = 0;
-    g_b_scaler.out_height = 0;
+void bicubic_scale_frame_reset() {
+  g_b_scaler.out_width = 0;
+  g_b_scaler.out_height = 0;
 }

 void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
-                         int new_width, int new_height)
-{
+                         int new_width, int new_height) {

-    dst->y_width = new_width;
-    dst->y_height = new_height;
-    dst->uv_width = new_width / 2;
-    dst->uv_height = new_height / 2;
+  dst->y_width = new_width;
+  dst->y_height = new_height;
+  dst->uv_width = new_width / 2;
+  dst->uv_height = new_height / 2;

-    dst->y_stride = dst->y_width;
-    dst->uv_stride = dst->uv_width;
+  dst->y_stride = dst->y_width;
+  dst->uv_stride = dst->uv_width;

-    bicubic_scale(src->y_width, src->y_height, src->y_stride,
-                  new_width, new_height, dst->y_stride,
-                  src->y_buffer, dst->y_buffer);
+  bicubic_scale(src->y_width, src->y_height, src->y_stride,
+                new_width, new_height, dst->y_stride,
+                src->y_buffer, dst->y_buffer);

-    bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
-                  new_width / 2, new_height / 2, dst->uv_stride,
-                  src->u_buffer, dst->u_buffer);
+  bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
+                new_width / 2, new_height / 2, dst->uv_stride,
+                src->u_buffer, dst->u_buffer);

-    bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
-                  new_width / 2, new_height / 2, dst->uv_stride,
-                  src->v_buffer, dst->v_buffer);
+  bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
+                new_width / 2, new_height / 2, dst->uv_stride,
+                src->v_buffer, dst->v_buffer);
 }
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@ -20,81 +20,73 @@
 *
 ****************************************************************************/
 int
-vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
-{
-    if (ybf)
-    {
-        vpx_free(ybf->buffer_alloc);
+vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
+  if (ybf) {
+    vpx_free(ybf->buffer_alloc);

-        /* buffer_alloc isn't accessed by most functions.  Rather y_buffer,
-          u_buffer and v_buffer point to buffer_alloc and are used.  Clear out
-          all of this so that a freed pointer isn't inadvertently used */
-        vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG));
-    }
-    else
-    {
-        return -1;
-    }
+    /* buffer_alloc isn't accessed by most functions.  Rather y_buffer,
+      u_buffer and v_buffer point to buffer_alloc and are used.  Clear out
+      all of this so that a freed pointer isn't inadvertently used */
+    vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG));
+  } else {
+    return -1;
+  }

-    return 0;
+  return 0;
 }

 /****************************************************************************
 *
 ****************************************************************************/
 int
-vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border)
-{
-/*NOTE:*/
+vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) {
+  /*NOTE:*/

-    if (ybf)
-    {
-        int y_stride = ((width + 2 * border) + 31) & ~31;
-        int yplane_size = (height + 2 * border) * y_stride;
-        int uv_width = width >> 1;
-        int uv_height = height >> 1;
-        /** There is currently a bunch of code which assumes
-          *  uv_stride == y_stride/2, so enforce this here. */
-        int uv_stride = y_stride >> 1;
-        int uvplane_size = (uv_height + border) * uv_stride;
+  if (ybf) {
+    int y_stride = ((width + 2 * border) + 31) & ~31;
+    int yplane_size = (height + 2 * border) * y_stride;
+    int uv_width = width >> 1;
+    int uv_height = height >> 1;
+    /** There is currently a bunch of code which assumes
+      *  uv_stride == y_stride/2, so enforce this here. */
+    int uv_stride = y_stride >> 1;
+    int uvplane_size = (uv_height + border) * uv_stride;

-        vp8_yv12_de_alloc_frame_buffer(ybf);
+    vp8_yv12_de_alloc_frame_buffer(ybf);

-        /** Only support allocating buffers that have a height and width that
-          *  are multiples of 16, and a border that's a multiple of 32.
-          * The border restriction is required to get 16-byte alignment of the
-          *  start of the chroma rows without intoducing an arbitrary gap
-          *  between planes, which would break the semantics of things like
-          *  vpx_img_set_rect(). */
-        if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
-            return -3;
+    /** Only support allocating buffers that have a height and width that
+      *  are multiples of 16, and a border that's a multiple of 32.
+      * The border restriction is required to get 16-byte alignment of the
+      *  start of the chroma rows without intoducing an arbitrary gap
+      *  between planes, which would break the semantics of things like
+      *  vpx_img_set_rect(). */
+    if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
+      return -3;

-        ybf->y_width  = width;
-        ybf->y_height = height;
-        ybf->y_stride = y_stride;
+    ybf->y_width  = width;
+    ybf->y_height = height;
+    ybf->y_stride = y_stride;

-        ybf->uv_width = uv_width;
-        ybf->uv_height = uv_height;
-        ybf->uv_stride = uv_stride;
+    ybf->uv_width = uv_width;
+    ybf->uv_height = uv_height;
+    ybf->uv_stride = uv_stride;

-        ybf->border = border;
-        ybf->frame_size = yplane_size + 2 * uvplane_size;
+    ybf->border = border;
+    ybf->frame_size = yplane_size + 2 * uvplane_size;

-        ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
+    ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);

-        if (ybf->buffer_alloc == NULL)
-            return -1;
+    if (ybf->buffer_alloc == NULL)
+      return -1;

-        ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
-        ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2  * uv_stride) + border / 2;
-        ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2  * uv_stride) + border / 2;
+    ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
+    ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2  * uv_stride) + border / 2;
+    ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2  * uv_stride) + border / 2;

-        ybf->corrupted = 0; /* assume not currupted by errors */
-    }
-    else
-    {
-        return -2;
-    }
+    ybf->corrupted = 0; /* assume not currupted by errors */
+  } else {
+    return -2;
+  }

-    return 0;
+  return 0;
 }
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@ -21,184 +21,174 @@
 *
 ****************************************************************************/
 void
-vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf)
-{
-    int i;
-    unsigned char *src_ptr1, *src_ptr2;
-    unsigned char *dest_ptr1, *dest_ptr2;
+vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
+  int i;
+  unsigned char *src_ptr1, *src_ptr2;
+  unsigned char *dest_ptr1, *dest_ptr2;

-    unsigned int Border;
-    int plane_stride;
-    int plane_height;
-    int plane_width;
+  unsigned int Border;
+  int plane_stride;
+  int plane_height;
+  int plane_width;

-    /***********/
-    /* Y Plane */
-    /***********/
-    Border = ybf->border;
-    plane_stride = ybf->y_stride;
-    plane_height = ybf->y_height;
-    plane_width = ybf->y_width;
+  /***********/
+  /* Y Plane */
+  /***********/
+  Border = ybf->border;
+  plane_stride = ybf->y_stride;
+  plane_height = ybf->y_height;
+  plane_width = ybf->y_width;

-    /* copy the left and right most columns out */
-    src_ptr1 = ybf->y_buffer;
-    src_ptr2 = src_ptr1 + plane_width - 1;
-    dest_ptr1 = src_ptr1 - Border;
-    dest_ptr2 = src_ptr2 + 1;
+  /* copy the left and right most columns out */
+  src_ptr1 = ybf->y_buffer;
+  src_ptr2 = src_ptr1 + plane_width - 1;
+  dest_ptr1 = src_ptr1 - Border;
+  dest_ptr2 = src_ptr2 + 1;

-    for (i = 0; i < plane_height; i++)
-    {
-        vpx_memset(dest_ptr1, src_ptr1[0], Border);
-        vpx_memset(dest_ptr2, src_ptr2[0], Border);
-        src_ptr1  += plane_stride;
-        src_ptr2  += plane_stride;
-        dest_ptr1 += plane_stride;
-        dest_ptr2 += plane_stride;
-    }
+  for (i = 0; i < plane_height; i++) {
+    vpx_memset(dest_ptr1, src_ptr1[0], Border);
+    vpx_memset(dest_ptr2, src_ptr2[0], Border);
+    src_ptr1  += plane_stride;
+    src_ptr2  += plane_stride;
+    dest_ptr1 += plane_stride;
+    dest_ptr2 += plane_stride;
+  }

-    /* Now copy the top and bottom source lines into each line of the respective borders */
-    src_ptr1 = ybf->y_buffer - Border;
-    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-    dest_ptr1 = src_ptr1 - (Border * plane_stride);
-    dest_ptr2 = src_ptr2 + plane_stride;
+  /* Now copy the top and bottom source lines into each line of the respective borders */
+  src_ptr1 = ybf->y_buffer - Border;
+  src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+  dest_ptr1 = src_ptr1 - (Border * plane_stride);
+  dest_ptr2 = src_ptr2 + plane_stride;

-    for (i = 0; i < (int)Border; i++)
-    {
-        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-        dest_ptr1 += plane_stride;
-        dest_ptr2 += plane_stride;
-    }
+  for (i = 0; i < (int)Border; i++) {
+    vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+    vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+    dest_ptr1 += plane_stride;
+    dest_ptr2 += plane_stride;
+  }


-    /***********/
-    /* U Plane */
-    /***********/
-    plane_stride = ybf->uv_stride;
-    plane_height = ybf->uv_height;
-    plane_width = ybf->uv_width;
-    Border /= 2;
+  /***********/
+  /* U Plane */
+  /***********/
+  plane_stride = ybf->uv_stride;
+  plane_height = ybf->uv_height;
+  plane_width = ybf->uv_width;
+  Border /= 2;

-    /* copy the left and right most columns out */
-    src_ptr1 = ybf->u_buffer;
-    src_ptr2 = src_ptr1 + plane_width - 1;
-    dest_ptr1 = src_ptr1 - Border;
-    dest_ptr2 = src_ptr2 + 1;
+  /* copy the left and right most columns out */
+  src_ptr1 = ybf->u_buffer;
+  src_ptr2 = src_ptr1 + plane_width - 1;
+  dest_ptr1 = src_ptr1 - Border;
+  dest_ptr2 = src_ptr2 + 1;

-    for (i = 0; i < plane_height; i++)
-    {
-        vpx_memset(dest_ptr1, src_ptr1[0], Border);
-        vpx_memset(dest_ptr2, src_ptr2[0], Border);
-        src_ptr1  += plane_stride;
-        src_ptr2  += plane_stride;
-        dest_ptr1 += plane_stride;
-        dest_ptr2 += plane_stride;
-    }
+  for (i = 0; i < plane_height; i++) {
+    vpx_memset(dest_ptr1, src_ptr1[0], Border);
+    vpx_memset(dest_ptr2, src_ptr2[0], Border);
+    src_ptr1  += plane_stride;
+    src_ptr2  += plane_stride;
+    dest_ptr1 += plane_stride;
+    dest_ptr2 += plane_stride;
+  }

-    /* Now copy the top and bottom source lines into each line of the respective borders */
-    src_ptr1 = ybf->u_buffer - Border;
-    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-    dest_ptr1 = src_ptr1 - (Border * plane_stride);
-    dest_ptr2 = src_ptr2 + plane_stride;
+  /* Now copy the top and bottom source lines into each line of the respective borders */
+  src_ptr1 = ybf->u_buffer - Border;
+  src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+  dest_ptr1 = src_ptr1 - (Border * plane_stride);
+  dest_ptr2 = src_ptr2 + plane_stride;

-    for (i = 0; i < (int)(Border); i++)
-    {
-        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-        dest_ptr1 += plane_stride;
-        dest_ptr2 += plane_stride;
-    }
+  for (i = 0; i < (int)(Border); i++) {
+    vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+    vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+    dest_ptr1 += plane_stride;
+    dest_ptr2 += plane_stride;
+  }

-    /***********/
-    /* V Plane */
-    /***********/
+  /***********/
+  /* V Plane */
+  /***********/

-    /* copy the left and right most columns out */
-    src_ptr1 = ybf->v_buffer;
-    src_ptr2 = src_ptr1 + plane_width - 1;
-    dest_ptr1 = src_ptr1 - Border;
-    dest_ptr2 = src_ptr2 + 1;
+  /* copy the left and right most columns out */
+  src_ptr1 = ybf->v_buffer;
+  src_ptr2 = src_ptr1 + plane_width - 1;
+  dest_ptr1 = src_ptr1 - Border;
+  dest_ptr2 = src_ptr2 + 1;

-    for (i = 0; i < plane_height; i++)
-    {
-        vpx_memset(dest_ptr1, src_ptr1[0], Border);
-        vpx_memset(dest_ptr2, src_ptr2[0], Border);
-        src_ptr1  += plane_stride;
-        src_ptr2  += plane_stride;
-        dest_ptr1 += plane_stride;
-        dest_ptr2 += plane_stride;
-    }
+  for (i = 0; i < plane_height; i++) {
+    vpx_memset(dest_ptr1, src_ptr1[0], Border);
+    vpx_memset(dest_ptr2, src_ptr2[0], Border);
+    src_ptr1  += plane_stride;
+    src_ptr2  += plane_stride;
+    dest_ptr1 += plane_stride;
+    dest_ptr2 += plane_stride;
+  }

-    /* Now copy the top and bottom source lines into each line of the respective borders */
-    src_ptr1 = ybf->v_buffer - Border;
-    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-    dest_ptr1 = src_ptr1 - (Border * plane_stride);
-    dest_ptr2 = src_ptr2 + plane_stride;
+  /* Now copy the top and bottom source lines into each line of the respective borders */
+  src_ptr1 = ybf->v_buffer - Border;
+  src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+  dest_ptr1 = src_ptr1 - (Border * plane_stride);
+  dest_ptr2 = src_ptr2 + plane_stride;

-    for (i = 0; i < (int)(Border); i++)
-    {
-        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-        dest_ptr1 += plane_stride;
-        dest_ptr2 += plane_stride;
-    }
+  for (i = 0; i < (int)(Border); i++) {
+    vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+    vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+    dest_ptr1 += plane_stride;
+    dest_ptr2 += plane_stride;
+  }
 }


 static void
-extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
-{
-    int i;
-    unsigned char *src_ptr1, *src_ptr2;
-    unsigned char *dest_ptr1, *dest_ptr2;
+extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) {
+  int i;
+  unsigned char *src_ptr1, *src_ptr2;
+  unsigned char *dest_ptr1, *dest_ptr2;

-    unsigned int Border;
-    int plane_stride;
-    int plane_height;
-    int plane_width;
+  unsigned int Border;
+  int plane_stride;
+  int plane_height;
+  int plane_width;

-    /***********/
-    /* Y Plane */
-    /***********/
-    Border = ybf->border;
-    plane_stride = ybf->y_stride;
-    plane_height = ybf->y_height;
-    plane_width = ybf->y_width;
+  /***********/
+  /* Y Plane */
+  /***********/
+  Border = ybf->border;
+  plane_stride = ybf->y_stride;
+  plane_height = ybf->y_height;
+  plane_width = ybf->y_width;

-    /* copy the left and right most columns out */
-    src_ptr1 = ybf->y_buffer;
-    src_ptr2 = src_ptr1 + plane_width - 1;
-    dest_ptr1 = src_ptr1 - Border;
-    dest_ptr2 = src_ptr2 + 1;
+  /* copy the left and right most columns out */
+  src_ptr1 = ybf->y_buffer;
+  src_ptr2 = src_ptr1 + plane_width - 1;
+  dest_ptr1 = src_ptr1 - Border;
+  dest_ptr2 = src_ptr2 + 1;

-    for (i = 0; i < plane_height; i++)
-    {
-        vpx_memset(dest_ptr1, src_ptr1[0], Border);
-        vpx_memset(dest_ptr2, src_ptr2[0], Border);
-        src_ptr1  += plane_stride;
-        src_ptr2  += plane_stride;
-        dest_ptr1 += plane_stride;
-        dest_ptr2 += plane_stride;
-    }
+  for (i = 0; i < plane_height; i++) {
+    vpx_memset(dest_ptr1, src_ptr1[0], Border);
+    vpx_memset(dest_ptr2, src_ptr2[0], Border);
+    src_ptr1  += plane_stride;
+    src_ptr2  += plane_stride;
+    dest_ptr1 += plane_stride;
+    dest_ptr2 += plane_stride;
+  }

-    /* Now copy the top and bottom source lines into each line of the respective borders */
-    src_ptr1 = ybf->y_buffer - Border;
-    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-    dest_ptr1 = src_ptr1 - (Border * plane_stride);
-    dest_ptr2 = src_ptr2 + plane_stride;
+  /* Now copy the top and bottom source lines into each line of the respective borders */
+  src_ptr1 = ybf->y_buffer - Border;
+  src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
+  dest_ptr1 = src_ptr1 - (Border * plane_stride);
+  dest_ptr2 = src_ptr2 + plane_stride;

-    for (i = 0; i < (int)Border; i++)
-    {
-        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-        dest_ptr1 += plane_stride;
-        dest_ptr2 += plane_stride;
-    }
+  for (i = 0; i < (int)Border; i++) {
+    vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
+    vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
+    dest_ptr1 += plane_stride;
+    dest_ptr2 += plane_stride;
+  }

-    plane_stride /= 2;
-    plane_height /= 2;
-    plane_width /= 2;
-    Border /= 2;
+  plane_stride /= 2;
+  plane_height /= 2;
+  plane_width /= 2;
+  Border /= 2;

 }

@ -221,57 +211,53 @@ extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
 *
 ****************************************************************************/
 void
-vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
-{
-    int row;
-    unsigned char *source, *dest;
+vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc,
+                      YV12_BUFFER_CONFIG *dst_ybc) {
+  int row;
+  unsigned char *source, *dest;

-    source = src_ybc->y_buffer;
-    dest = dst_ybc->y_buffer;
+  source = src_ybc->y_buffer;
+  dest = dst_ybc->y_buffer;

-    for (row = 0; row < src_ybc->y_height; row++)
-    {
-        vpx_memcpy(dest, source, src_ybc->y_width);
-        source += src_ybc->y_stride;
-        dest   += dst_ybc->y_stride;
-    }
+  for (row = 0; row < src_ybc->y_height; row++) {
+    vpx_memcpy(dest, source, src_ybc->y_width);
+    source += src_ybc->y_stride;
+    dest   += dst_ybc->y_stride;
+  }

-    source = src_ybc->u_buffer;
-    dest = dst_ybc->u_buffer;
+  source = src_ybc->u_buffer;
+  dest = dst_ybc->u_buffer;

-    for (row = 0; row < src_ybc->uv_height; row++)
-    {
-        vpx_memcpy(dest, source, src_ybc->uv_width);
-        source += src_ybc->uv_stride;
-        dest   += dst_ybc->uv_stride;
-    }
+  for (row = 0; row < src_ybc->uv_height; row++) {
+    vpx_memcpy(dest, source, src_ybc->uv_width);
+    source += src_ybc->uv_stride;
+    dest   += dst_ybc->uv_stride;
+  }

-    source = src_ybc->v_buffer;
-    dest = dst_ybc->v_buffer;
+  source = src_ybc->v_buffer;
+  dest = dst_ybc->v_buffer;

-    for (row = 0; row < src_ybc->uv_height; row++)
-    {
-        vpx_memcpy(dest, source, src_ybc->uv_width);
-        source += src_ybc->uv_stride;
-        dest   += dst_ybc->uv_stride;
-    }
+  for (row = 0; row < src_ybc->uv_height; row++) {
+    vpx_memcpy(dest, source, src_ybc->uv_width);
+    source += src_ybc->uv_stride;
+    dest   += dst_ybc->uv_stride;
+  }

-    vp8_yv12_extend_frame_borders_c(dst_ybc);
+  vp8_yv12_extend_frame_borders_c(dst_ybc);
 }

-void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
-{
-    int row;
-    unsigned char *source, *dest;
+void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc,
+                       YV12_BUFFER_CONFIG *dst_ybc) {
+  int row;
+  unsigned char *source, *dest;


-    source = src_ybc->y_buffer;
-    dest = dst_ybc->y_buffer;
+  source = src_ybc->y_buffer;
+  dest = dst_ybc->y_buffer;

-    for (row = 0; row < src_ybc->y_height; row++)
-    {
-        vpx_memcpy(dest, source, src_ybc->y_width);
-        source += src_ybc->y_stride;
-        dest   += dst_ybc->y_stride;
-    }
+  for (row = 0; row < src_ybc->y_height; row++) {
+    vpx_memcpy(dest, source, src_ybc->y_width);
+    source += src_ybc->y_stride;
+    dest   += dst_ybc->y_stride;
+  }
 }
--- a/vpx_scale/include/generic/vpxscale_arbitrary.h
+++ b/vpx_scale/include/generic/vpxscale_arbitrary.h
@ -14,33 +14,32 @@

 #include "vpx_scale/yv12config.h"

-typedef struct
-{
-    int in_width;
-    int in_height;
+typedef struct {
+  int in_width;
+  int in_height;

-    int out_width;
-    int out_height;
-    int max_usable_out_width;
+  int out_width;
+  int out_height;
+  int max_usable_out_width;

-    // numerator for the width and height
-    int nw;
-    int nh;
-    int nh_uv;
+  // numerator for the width and height
+  int nw;
+  int nh;
+  int nh_uv;

-    // output to input correspondance array
-    short *l_w;
-    short *l_h;
-    short *l_h_uv;
+  // output to input correspondance array
+  short *l_w;
+  short *l_h;
+  short *l_h_uv;

-    // polyphase coefficients
-    short *c_w;
-    short *c_h;
-    short *c_h_uv;
+  // polyphase coefficients
+  short *c_w;
+  short *c_h;
+  short *c_h_uv;

-    // buffer for horizontal filtering.
-    unsigned char *hbuf;
-    unsigned char *hbuf_uv;
+  // buffer for horizontal filtering.
+  unsigned char *hbuf;
+  unsigned char *hbuf_uv;
 } BICUBIC_SCALER_STRUCT;

 int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height);
--- a/vpx_scale/scale_mode.h
+++ b/vpx_scale/scale_mode.h
@ -17,12 +17,11 @@
 #ifndef SCALE_MODE_H
 #define SCALE_MODE_H

-typedef enum
-{
-    MAINTAIN_ASPECT_RATIO   = 0x0,
-    SCALE_TO_FIT            = 0x1,
-    CENTER                  = 0x2,
-    OTHER                   = 0x3
+typedef enum {
+  MAINTAIN_ASPECT_RATIO   = 0x0,
+  SCALE_TO_FIT            = 0x1,
+  CENTER                  = 0x2,
+  OTHER                   = 0x3
 } SCALE_MODE;


--- a/vpx_scale/vpxscale.h
+++ b/vpx_scale/vpxscale.h
@ -14,29 +14,24 @@

 #include "vpx_scale/yv12config.h"

-extern void vp8_yv12_scale_or_center
-(
-    YV12_BUFFER_CONFIG *src_yuv_config,
-    YV12_BUFFER_CONFIG *dst_yuv_config,
-    int expanded_frame_width,
-    int expanded_frame_height,
-    int scaling_mode,
-    int HScale,
-    int HRatio,
-    int VScale,
-    int VRatio
-);
-extern void vp8_scale_frame
-(
-    YV12_BUFFER_CONFIG *src,
-    YV12_BUFFER_CONFIG *dst,
-    unsigned char *temp_area,
-    unsigned char temp_height,
-    unsigned int hscale,
-    unsigned int hratio,
-    unsigned int vscale,
-    unsigned int vratio,
-    unsigned int interlaced
-);
+extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config,
+                                     YV12_BUFFER_CONFIG *dst_yuv_config,
+                                     int expanded_frame_width,
+                                     int expanded_frame_height,
+                                     int scaling_mode,
+                                     int HScale,
+                                     int HRatio,
+                                     int VScale,
+                                     int VRatio);
+
+extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src,
+                            YV12_BUFFER_CONFIG *dst,
+                            unsigned char *temp_area,
+                            unsigned char temp_height,
+                            unsigned int hscale,
+                            unsigned int hratio,
+                            unsigned int vscale,
+                            unsigned int vratio,
+                            unsigned int interlaced);

 #endif
--- a/vpx_scale/win32/scaleopt.c
+++ b/vpx_scale/win32/scaleopt.c
--- a/vpx_scale/win32/scalesystemdependent.c
+++ b/vpx_scale/win32/scalesystemdependent.c
@ -46,46 +46,42 @@ extern void register_mmxscalers(void);
 *
 ****************************************************************************/
 void
-vp8_scale_machine_specific_config(void)
-{
-    // If MMX supported then set to use MMX versions of functions else
-    // use original 'C' versions.
-    int mmx_enabled;
-    int xmm_enabled;
-    int wmt_enabled;
+vp8_scale_machine_specific_config(void) {
+  // If MMX supported then set to use MMX versions of functions else
+  // use original 'C' versions.
+  int mmx_enabled;
+  int xmm_enabled;
+  int wmt_enabled;

-    vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
+  vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);

-    if (mmx_enabled || xmm_enabled || wmt_enabled)
-    {
-        register_mmxscalers();
-    }
-    else
-    {
-        vp8_horizontal_line_1_2_scale        = vp8cx_horizontal_line_1_2_scale_c;
-        vp8_vertical_band_1_2_scale          = vp8cx_vertical_band_1_2_scale_c;
-        vp8_last_vertical_band_1_2_scale      = vp8cx_last_vertical_band_1_2_scale_c;
-        vp8_horizontal_line_3_5_scale        = vp8cx_horizontal_line_3_5_scale_c;
-        vp8_vertical_band_3_5_scale          = vp8cx_vertical_band_3_5_scale_c;
-        vp8_last_vertical_band_3_5_scale      = vp8cx_last_vertical_band_3_5_scale_c;
-        vp8_horizontal_line_3_4_scale        = vp8cx_horizontal_line_3_4_scale_c;
-        vp8_vertical_band_3_4_scale          = vp8cx_vertical_band_3_4_scale_c;
-        vp8_last_vertical_band_3_4_scale      = vp8cx_last_vertical_band_3_4_scale_c;
-        vp8_horizontal_line_2_3_scale        = vp8cx_horizontal_line_2_3_scale_c;
-        vp8_vertical_band_2_3_scale          = vp8cx_vertical_band_2_3_scale_c;
-        vp8_last_vertical_band_2_3_scale      = vp8cx_last_vertical_band_2_3_scale_c;
-        vp8_horizontal_line_4_5_scale        = vp8cx_horizontal_line_4_5_scale_c;
-        vp8_vertical_band_4_5_scale          = vp8cx_vertical_band_4_5_scale_c;
-        vp8_last_vertical_band_4_5_scale      = vp8cx_last_vertical_band_4_5_scale_c;
+  if (mmx_enabled || xmm_enabled || wmt_enabled) {
+    register_mmxscalers();
+  } else {
+    vp8_horizontal_line_1_2_scale        = vp8cx_horizontal_line_1_2_scale_c;
+    vp8_vertical_band_1_2_scale          = vp8cx_vertical_band_1_2_scale_c;
+    vp8_last_vertical_band_1_2_scale      = vp8cx_last_vertical_band_1_2_scale_c;
+    vp8_horizontal_line_3_5_scale        = vp8cx_horizontal_line_3_5_scale_c;
+    vp8_vertical_band_3_5_scale          = vp8cx_vertical_band_3_5_scale_c;
+    vp8_last_vertical_band_3_5_scale      = vp8cx_last_vertical_band_3_5_scale_c;
+    vp8_horizontal_line_3_4_scale        = vp8cx_horizontal_line_3_4_scale_c;
+    vp8_vertical_band_3_4_scale          = vp8cx_vertical_band_3_4_scale_c;
+    vp8_last_vertical_band_3_4_scale      = vp8cx_last_vertical_band_3_4_scale_c;
+    vp8_horizontal_line_2_3_scale        = vp8cx_horizontal_line_2_3_scale_c;
+    vp8_vertical_band_2_3_scale          = vp8cx_vertical_band_2_3_scale_c;
+    vp8_last_vertical_band_2_3_scale      = vp8cx_last_vertical_band_2_3_scale_c;
+    vp8_horizontal_line_4_5_scale        = vp8cx_horizontal_line_4_5_scale_c;
+    vp8_vertical_band_4_5_scale          = vp8cx_vertical_band_4_5_scale_c;
+    vp8_last_vertical_band_4_5_scale      = vp8cx_last_vertical_band_4_5_scale_c;


-        vp8_vertical_band_5_4_scale           = vp8cx_vertical_band_5_4_scale_c;
-        vp8_vertical_band_5_3_scale           = vp8cx_vertical_band_5_3_scale_c;
-        vp8_vertical_band_2_1_scale           = vp8cx_vertical_band_2_1_scale_c;
-        vp8_vertical_band_2_1_scale_i         = vp8cx_vertical_band_2_1_scale_i_c;
-        vp8_horizontal_line_2_1_scale         = vp8cx_horizontal_line_2_1_scale_c;
-        vp8_horizontal_line_5_3_scale         = vp8cx_horizontal_line_5_3_scale_c;
-        vp8_horizontal_line_5_4_scale         = vp8cx_horizontal_line_5_4_scale_c;
+    vp8_vertical_band_5_4_scale           = vp8cx_vertical_band_5_4_scale_c;
+    vp8_vertical_band_5_3_scale           = vp8cx_vertical_band_5_3_scale_c;
+    vp8_vertical_band_2_1_scale           = vp8cx_vertical_band_2_1_scale_c;
+    vp8_vertical_band_2_1_scale_i         = vp8cx_vertical_band_2_1_scale_i_c;
+    vp8_horizontal_line_2_1_scale         = vp8cx_horizontal_line_2_1_scale_c;
+    vp8_horizontal_line_5_3_scale         = vp8cx_horizontal_line_5_3_scale_c;
+    vp8_horizontal_line_5_4_scale         = vp8cx_horizontal_line_5_4_scale_c;

-    }
+  }
 }
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@ -16,54 +16,54 @@ extern "C"
 {
 #endif

-#define VP7BORDERINPIXELS       48
 #define VP8BORDERINPIXELS       32
+#define VP9BORDERINPIXELS       64
+#define VP9_INTERP_EXTEND        4

-    /*************************************
-     For INT_YUV:
+  /*************************************
+   For INT_YUV:

-     Y = (R+G*2+B)/4;
-     U = (R-B)/2;
-     V =  (G*2 - R - B)/4;
-    And
-     R = Y+U-V;
-     G = Y+V;
-     B = Y-U-V;
-    ************************************/
-    typedef enum
-    {
-        REG_YUV = 0,    /* Regular yuv */
-        INT_YUV = 1     /* The type of yuv that can be tranfer to and from RGB through integer transform */
-              }
-              YUV_TYPE;
+   Y = (R+G*2+B)/4;
+   U = (R-B)/2;
+   V =  (G*2 - R - B)/4;
+  And
+   R = Y+U-V;
+   G = Y+V;
+   B = Y-U-V;
+  ************************************/
+  typedef enum
+  {
+    REG_YUV = 0,    /* Regular yuv */
+    INT_YUV = 1     /* The type of yuv that can be tranfer to and from RGB through integer transform */
+  }
+            YUV_TYPE;

-    typedef struct yv12_buffer_config
-    {
-        int   y_width;
-        int   y_height;
-        int   y_stride;
-/*    int   yinternal_width; */
+  typedef struct yv12_buffer_config {
+    int   y_width;
+    int   y_height;
+    int   y_stride;
+    /*    int   yinternal_width; */

-        int   uv_width;
-        int   uv_height;
-        int   uv_stride;
-/*    int   uvinternal_width; */
+    int   uv_width;
+    int   uv_height;
+    int   uv_stride;
+    /*    int   uvinternal_width; */

-        unsigned char *y_buffer;
-        unsigned char *u_buffer;
-        unsigned char *v_buffer;
+    unsigned char *y_buffer;
+    unsigned char *u_buffer;
+    unsigned char *v_buffer;

-        unsigned char *buffer_alloc;
-        int border;
-        int frame_size;
-        YUV_TYPE clrtype;
+    unsigned char *buffer_alloc;
+    int border;
+    int frame_size;
+    YUV_TYPE clrtype;

-        int corrupted;
-        int flags;
-    } YV12_BUFFER_CONFIG;
+    int corrupted;
+    int flags;
+  } YV12_BUFFER_CONFIG;

-    int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
-    int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
+  int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
+  int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);

 #ifdef __cplusplus
 }